From e4bbffa4263bc5409faead8aa0f0820729cd0440 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 24 Nov 2024 07:14:59 -0500
Subject: [PATCH 1/8] Temp patch to new sqlparser

---
 Cargo.toml                |   4 +
 datafusion-cli/Cargo.lock | 189 +++++++++++++++++++++-----------------
 2 files changed, 107 insertions(+), 86 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 76bc50d59a08..6e2127430a8e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -175,3 +175,7 @@ large_futures = "warn"
 [workspace.lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }
 unused_qualifications = "deny"
+
+## Temp patch for sqlparser
+[patch.crates-io]
+sqlparser = { git = "https://github.com/apache/datafusion-sqlparser-rs.git", rev = "4ab3ab91473d152c652e6582b63abb13535703f9" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 04b0b0d22cfd..54c6623ba2b1 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -220,7 +220,7 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.15.1",
+ "hashbrown 0.15.2",
  "num",
 ]
 
@@ -406,9 +406,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.17"
+version = "0.4.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857"
+checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522"
 dependencies = [
  "bzip2",
  "flate2",
@@ -814,9 +814,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.4"
+version = "1.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7"
+checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -880,9 +880,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.8.0"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
+checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
 
 [[package]]
 name = "bytes-utils"
@@ -917,9 +917,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.1"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47"
+checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc"
 dependencies = [
  "jobserver",
  "libc",
@@ -1080,6 +1080,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "core-foundation"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -1097,9 +1107,9 @@ dependencies = [
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6"
+checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3"
 dependencies = [
  "libc",
 ]
@@ -1700,12 +1710,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
 name = "errno"
-version = "0.3.9"
+version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
+checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
 dependencies = [
  "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1972,9 +1982,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.1"
+version = "0.15.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
+checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
 
 [[package]]
 name = "heck"
@@ -1988,12 +1998,6 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
-[[package]]
-name = "hermit-abi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
-
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -2162,8 +2166,8 @@ dependencies = [
  "http 1.1.0",
  "hyper 1.5.1",
  "hyper-util",
- "rustls 0.23.17",
- "rustls-native-certs 0.8.0",
+ "rustls 0.23.19",
+ "rustls-native-certs 0.8.1",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls 0.26.0",
@@ -2358,7 +2362,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.1",
+ "hashbrown 0.15.2",
 ]
 
 [[package]]
@@ -2390,9 +2394,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2"
+checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
 
 [[package]]
 name = "jobserver"
@@ -2405,10 +2409,11 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.72"
+version = "0.3.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9"
+checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705"
 dependencies = [
+ "once_cell",
  "wasm-bindgen",
 ]
 
@@ -2484,9 +2489,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.164"
+version = "0.2.167"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f"
+checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
 
 [[package]]
 name = "libflate"
@@ -2546,9 +2551,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
 
 [[package]]
 name = "litemap"
-version = "0.7.3"
+version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
+checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
 
 [[package]]
 name = "lock_api"
@@ -2628,11 +2633,10 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
+checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
 dependencies = [
- "hermit-abi",
  "libc",
  "wasi",
  "windows-sys 0.52.0",
@@ -2862,7 +2866,7 @@ dependencies = [
  "flate2",
  "futures",
  "half",
- "hashbrown 0.15.1",
+ "hashbrown 0.15.2",
  "lz4_flex",
  "num",
  "num-bigint",
@@ -3020,9 +3024,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.89"
+version = "1.0.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e"
+checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
 dependencies = [
  "unicode-ident",
 ]
@@ -3063,7 +3067,7 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash",
- "rustls 0.23.17",
+ "rustls 0.23.19",
  "socket2",
  "thiserror 2.0.3",
  "tokio",
@@ -3081,7 +3085,7 @@ dependencies = [
  "rand",
  "ring",
  "rustc-hash",
- "rustls 0.23.17",
+ "rustls 0.23.19",
  "rustls-pki-types",
  "slab",
  "thiserror 2.0.3",
@@ -3259,8 +3263,8 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.17",
- "rustls-native-certs 0.8.0",
+ "rustls 0.23.19",
+ "rustls-native-certs 0.8.1",
  "rustls-pemfile 2.2.0",
  "rustls-pki-types",
  "serde",
@@ -3378,9 +3382,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.17"
+version = "0.23.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f1a745511c54ba6d4465e8d5dfbd81b45791756de28d4981af70d6dca128f1e"
+checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1"
 dependencies = [
  "once_cell",
  "ring",
@@ -3399,20 +3403,19 @@ dependencies = [
  "openssl-probe",
  "rustls-pemfile 1.0.4",
  "schannel",
- "security-framework",
+ "security-framework 2.11.1",
 ]
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a"
+checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
 dependencies = [
  "openssl-probe",
- "rustls-pemfile 2.2.0",
  "rustls-pki-types",
  "schannel",
- "security-framework",
+ "security-framework 3.0.1",
 ]
 
 [[package]]
@@ -3538,7 +3541,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
  "bitflags 2.6.0",
- "core-foundation",
+ "core-foundation 0.9.4",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework"
+version = "3.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8"
+dependencies = [
+ "bitflags 2.6.0",
+ "core-foundation 0.10.0",
  "core-foundation-sys",
  "libc",
  "security-framework-sys",
@@ -3686,9 +3702,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "socket2"
-version = "0.5.7"
+version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
+checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
 dependencies = [
  "libc",
  "windows-sys 0.52.0",
@@ -3801,9 +3817,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
 [[package]]
 name = "syn"
-version = "2.0.87"
+version = "2.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
+checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4009,7 +4025,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.17",
+ "rustls 0.23.19",
  "rustls-pki-types",
  "tokio",
 ]
@@ -4052,9 +4068,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
-version = "0.1.40"
+version = "0.1.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
 dependencies = [
  "pin-project-lite",
  "tracing-attributes",
@@ -4063,9 +4079,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.27"
+version = "0.1.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
+checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4074,9 +4090,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-core"
-version = "0.1.32"
+version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
+checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
 dependencies = [
  "once_cell",
 ]
@@ -4155,9 +4171,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "url"
-version = "2.5.3"
+version = "2.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada"
+checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -4246,9 +4262,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.95"
+version = "0.2.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e"
+checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -4257,9 +4273,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.95"
+version = "0.2.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358"
+checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd"
 dependencies = [
  "bumpalo",
  "log",
@@ -4272,21 +4288,22 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.45"
+version = "0.4.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b"
+checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d"
 dependencies = [
  "cfg-if",
  "js-sys",
+ "once_cell",
  "wasm-bindgen",
  "web-sys",
 ]
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.95"
+version = "0.2.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56"
+checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -4294,9 +4311,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.95"
+version = "0.2.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
+checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4307,9 +4324,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.95"
+version = "0.2.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d"
+checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49"
 
 [[package]]
 name = "wasm-streams"
@@ -4326,9 +4343,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.72"
+version = "0.3.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112"
+checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -4578,9 +4595,9 @@ dependencies = [
 
 [[package]]
 name = "yoke"
-version = "0.7.4"
+version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
+checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
 dependencies = [
  "serde",
  "stable_deref_trait",
@@ -4590,9 +4607,9 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.7.4"
+version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
+checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4623,18 +4640,18 @@ dependencies = [
 
 [[package]]
 name = "zerofrom"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
+checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
+checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
 dependencies = [
  "proc-macro2",
  "quote",

From 3545647960c898c0ee8f144907f43051b13c66a8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 24 Nov 2024 07:31:32 -0500
Subject: [PATCH 2/8] Update for new sqlparser API

---
 datafusion/common/src/utils/mod.rs            |   5 +-
 .../user_defined_scalar_functions.rs          |  10 +-
 datafusion/expr/src/expr.rs                   |   2 +
 datafusion/sql/src/expr/function.rs           |   5 +
 datafusion/sql/src/expr/mod.rs                |   4 +-
 datafusion/sql/src/parser.rs                  |  14 +-
 datafusion/sql/src/planner.rs                 |  10 +-
 datafusion/sql/src/select.rs                  |   1 +
 datafusion/sql/src/statement.rs               | 144 +++++++++++++-----
 datafusion/sql/src/unparser/ast.rs            |   3 +
 datafusion/sql/src/unparser/dialect.rs        |   5 +-
 datafusion/sql/src/unparser/expr.rs           |  16 +-
 datafusion/sql/src/unparser/plan.rs           |   9 +-
 datafusion/sql/src/unparser/utils.rs          |  11 +-
 .../test_files/information_schema.slt         |   4 +-
 15 files changed, 182 insertions(+), 61 deletions(-)

diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index f3bba8e254d9..c4a43446e1e5 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -775,10 +775,10 @@ pub fn get_available_parallelism() -> usize {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use crate::ScalarValue::Null;
     use arrow::array::Float64Array;
-
-    use super::*;
+    use sqlparser::tokenizer::Span;
 
     #[test]
     fn test_bisect_linear_left_and_right() -> Result<()> {
@@ -1006,6 +1006,7 @@ mod tests {
             let expected_parsed = vec![Ident {
                 value: identifier.to_string(),
                 quote_style,
+                span: Span::empty(),
             }];
 
             assert_eq!(
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index a59394f90814..79577eaa1d50 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -27,10 +27,6 @@ use arrow_array::{
     Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray,
 };
 use arrow_schema::{DataType, Field, Schema};
-use parking_lot::Mutex;
-use regex::Regex;
-use sqlparser::ast::Ident;
-
 use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState};
 use datafusion::prelude::*;
 use datafusion::{execution::registry::FunctionRegistry, test_util};
@@ -48,6 +44,10 @@ use datafusion_expr::{
     Volatility,
 };
 use datafusion_functions_nested::range::range_udf;
+use parking_lot::Mutex;
+use regex::Regex;
+use sqlparser::ast::Ident;
+use sqlparser::tokenizer::Span;
 
 /// test that casting happens on udfs.
 /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
@@ -1187,6 +1187,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<(
             name: Some(Ident {
                 value: "name".into(),
                 quote_style: None,
+                span: Span::empty(),
             }),
             data_type: DataType::Utf8,
             default_expr: None,
@@ -1196,6 +1197,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<(
             language: Some(Ident {
                 value: "plrust".into(),
                 quote_style: None,
+                span: Span::empty(),
             }),
             behavior: None,
             function_body: Some(lit(body)),
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c495b5396f53..d0e98327f34b 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -222,6 +222,8 @@ use sqlparser::ast::{
 /// // to 42 = 5 AND b = 6
 /// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
 #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
+// TODO make the enum smaller with more boxing (looks like Wildcard is now bigger)
+#[allow(clippy::large_enum_variant)]
 pub enum Expr {
     /// An expression with a specific name.
     Alias(Alias),
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 67fa23b86990..15b415e3dc42 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -169,6 +169,11 @@ impl FunctionArgs {
                         "Calling {name}: SEPARATOR not supported in function arguments: {sep}"
                     )
                 }
+                FunctionArgumentClause::JsonNullClause(jn) => {
+                    return not_impl_err!(
+                        "Calling {name}: JSON NULL clause not supported in function arguments: {jn}"
+                    )
+                }
             }
         }
 
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 57ac96951f1f..0195f3d32032 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -565,11 +565,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 }
                 not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
             }
-            SQLExpr::Wildcard => Ok(Expr::Wildcard {
+            SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard {
                 qualifier: None,
                 options: WildcardOptions::default(),
             }),
-            SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard {
+            SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard {
                 qualifier: Some(self.object_name_to_table_reference(object_name)?),
                 options: WildcardOptions::default(),
             }),
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index bd1ed3145ef5..fb83ecea88b8 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -20,6 +20,7 @@
 use std::collections::VecDeque;
 use std::fmt;
 
+use sqlparser::tokenizer::TokenWithSpan;
 use sqlparser::{
     ast::{
         ColumnDef, ColumnOptionDef, Expr, ObjectName, OrderByExpr, Query,
@@ -27,7 +28,7 @@ use sqlparser::{
     },
     dialect::{keywords::Keyword, Dialect, GenericDialect},
     parser::{Parser, ParserError},
-    tokenizer::{Token, TokenWithLocation, Tokenizer, Word},
+    tokenizer::{Token, Tokenizer, Word},
 };
 
 // Use `Parser::expected` instead, if possible
@@ -337,7 +338,7 @@ impl<'a> DFParser<'a> {
     fn expected<T>(
         &self,
         expected: &str,
-        found: TokenWithLocation,
+        found: TokenWithSpan,
     ) -> Result<T, ParserError> {
         parser_err!(format!("Expected {expected}, found: {found}"))
     }
@@ -875,6 +876,7 @@ mod tests {
     use super::*;
     use sqlparser::ast::Expr::Identifier;
     use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident};
+    use sqlparser::tokenizer::Span;
 
     fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
         let statements = DFParser::parse_sql(sql)?;
@@ -910,6 +912,7 @@ mod tests {
             name: Ident {
                 value: name.into(),
                 quote_style: None,
+                span: Span::empty(),
             },
             data_type,
             collation: None,
@@ -1218,6 +1221,7 @@ mod tests {
                     expr: Identifier(Ident {
                         value: "c1".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     }),
                     asc,
                     nulls_first,
@@ -1249,6 +1253,7 @@ mod tests {
                     expr: Identifier(Ident {
                         value: "c1".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     }),
                     asc: Some(true),
                     nulls_first: None,
@@ -1258,6 +1263,7 @@ mod tests {
                     expr: Identifier(Ident {
                         value: "c2".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     }),
                     asc: Some(false),
                     nulls_first: Some(true),
@@ -1289,11 +1295,13 @@ mod tests {
                     left: Box::new(Identifier(Ident {
                         value: "c1".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     })),
                     op: BinaryOperator::Minus,
                     right: Box::new(Identifier(Ident {
                         value: "c2".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     })),
                 },
                 asc: Some(true),
@@ -1334,11 +1342,13 @@ mod tests {
                     left: Box::new(Identifier(Ident {
                         value: "c1".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     })),
                     op: BinaryOperator::Minus,
                     right: Box::new(Identifier(Ident {
                         value: "c2".to_owned(),
                         quote_style: None,
+                        span: Span::empty(),
                     })),
                 },
                 asc: Some(true),
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 382c503154dd..6ef2eb4e7fb2 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -339,7 +339,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         plan: LogicalPlan,
         alias: TableAlias,
     ) -> Result<LogicalPlan> {
-        let plan = self.apply_expr_alias(plan, alias.columns)?;
+        let idents = alias.columns.into_iter().map(|c| c.name).collect();
+        let plan = self.apply_expr_alias(plan, idents)?;
 
         LogicalPlanBuilder::from(plan)
             .alias(TableReference::bare(
@@ -586,6 +587,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Nullable(_)
             | SQLDataType::LowCardinality(_)
             | SQLDataType::Trigger
+            // MySQL datatypes
+            | SQLDataType::TinyBlob
+            | SQLDataType::MediumBlob
+            | SQLDataType::LongBlob
+            | SQLDataType::TinyText
+            | SQLDataType::MediumText
+            | SQLDataType::LongText
             => not_impl_err!(
                 "Unsupported SQL type {sql_type:?}"
             ),
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 39b6eb6e8132..9899d92bde71 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -654,6 +654,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             opt_rename,
             opt_replace: _opt_replace,
             opt_ilike: _opt_ilike,
+            wildcard_token: _wildcard_token,
         } = options;
 
         if opt_rename.is_some() {
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 38695f98b5fe..47a842643b8d 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -54,13 +54,13 @@ use datafusion_expr::{
     TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart,
     Volatility, WriteOp,
 };
-use sqlparser::ast::{self, SqliteOnConflict};
+use sqlparser::ast::{self, ShowStatementIn, ShowStatementOptions, SqliteOnConflict};
 use sqlparser::ast::{
     Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable,
     CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert,
     ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr,
-    ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor,
-    TableWithJoins, TransactionMode, UnaryOperator, Value,
+    ShowCreateObject, Statement, TableConstraint, TableFactor, TableWithJoins,
+    TransactionMode, UnaryOperator, Value,
 };
 use sqlparser::parser::ParserError::ParserError;
 
@@ -685,19 +685,99 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Statement::ShowTables {
                 extended,
                 full,
-                db_name,
-                filter,
-                // SHOW TABLES IN/FROM are equivalent, this field specifies which the user
-                // specified, but it doesn't affect the plan so ignore the field
-                clause: _,
-            } => self.show_tables_to_plan(extended, full, db_name, filter),
+                terse,
+                history,
+                external,
+                show_options,
+            } => {
+                // We only support the basic "SHOW TABLES"
+                // https://github.com/apache/datafusion/issues/3188
+                if extended {
+                    return not_impl_err!("SHOW TABLES EXTENDED not supported")?;
+                }
+                if full {
+                    return not_impl_err!("SHOW FULL TABLES not supported")?;
+                }
+                if terse {
+                    return not_impl_err!("SHOW TERSE TABLES not supported")?;
+                }
+                if history {
+                    return not_impl_err!("SHOW TABLES HISTORY not supported")?;
+                }
+                if external {
+                    return not_impl_err!("SHOW EXTERNAL TABLES not supported")?;
+                }
+                let ShowStatementOptions {
+                    show_in,
+                    starts_with,
+                    limit,
+                    limit_from,
+                    filter_position,
+                } = show_options;
+                if show_in.is_some() {
+                    return not_impl_err!("SHOW TABLES IN not supported")?;
+                }
+                if starts_with.is_some() {
+                    return not_impl_err!("SHOW TABLES LIKE not supported")?;
+                }
+                if limit.is_some() {
+                    return not_impl_err!("SHOW TABLES LIMIT not supported")?;
+                }
+                if limit_from.is_some() {
+                    return not_impl_err!("SHOW TABLES LIMIT FROM not supported")?;
+                }
+                if filter_position.is_some() {
+                    return not_impl_err!("SHOW TABLES FILTER not supported")?;
+                }
+                self.show_tables_to_plan()
+            }
 
             Statement::ShowColumns {
                 extended,
                 full,
-                table_name,
-                filter,
-            } => self.show_columns_to_plan(extended, full, table_name, filter),
+                show_options,
+            } => {
+                let ShowStatementOptions {
+                    show_in,
+                    starts_with,
+                    limit,
+                    limit_from,
+                    filter_position,
+                } = show_options;
+                if starts_with.is_some() {
+                    return not_impl_err!("SHOW COLUMNS LIKE not supported")?;
+                }
+                if limit.is_some() {
+                    return not_impl_err!("SHOW COLUMNS LIMIT not supported")?;
+                }
+                if limit_from.is_some() {
+                    return not_impl_err!("SHOW COLUMNS LIMIT FROM not supported")?;
+                }
+                if filter_position.is_some() {
+                    return not_impl_err!(
+                        "SHOW COLUMNS with WHERE or LIKE is not supported"
+                    )?;
+                }
+                let Some(ShowStatementIn {
+                    // specifies if the syntax was `SHOW COLUMNS IN` or `SHOW
+                    // COLUMNS FROM` which is not different in DataFusion
+                    clause: _,
+                    parent_type,
+                    parent_name,
+                }) = show_in
+                else {
+                    return plan_err!("SHOW COLUMNS requires a table name");
+                };
+
+                if let Some(parent_type) = parent_type {
+                    return not_impl_err!("SHOW COLUMNS IN {parent_type} not supported");
+                }
+                let Some(table_name) = parent_name else {
+                    return plan_err!("SHOW COLUMNS requires a table name");
+                };
+
+                self.show_columns_to_plan(extended, full, table_name)
+            }
 
             Statement::Insert(Insert {
                 or,
@@ -766,10 +846,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 from,
                 selection,
                 returning,
+                or,
             } => {
                 if returning.is_some() {
                     plan_err!("Update-returning clause not yet supported")?;
                 }
+                if or.is_some() {
+                    plan_err!("ON conflict not supported")?;
+                }
                 self.update_to_plan(table, assignments, from, selection)
             }
 
@@ -1065,24 +1149,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     }
 
     /// Generate a logical plan from a "SHOW TABLES" query
-    fn show_tables_to_plan(
-        &self,
-        extended: bool,
-        full: bool,
-        db_name: Option<Ident>,
-        filter: Option<ShowStatementFilter>,
-    ) -> Result<LogicalPlan> {
+    fn show_tables_to_plan(&self) -> Result<LogicalPlan> {
         if self.has_table("information_schema", "tables") {
-            // We only support the basic "SHOW TABLES"
-            // https://github.com/apache/datafusion/issues/3188
-            if db_name.is_some() || filter.is_some() || full || extended {
-                plan_err!("Unsupported parameters to SHOW TABLES")
-            } else {
-                let query = "SELECT * FROM information_schema.tables;";
-                let mut rewrite = DFParser::parse_sql(query)?;
-                assert_eq!(rewrite.len(), 1);
-                self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1
-            }
+            let query = "SELECT * FROM information_schema.tables;";
+            let mut rewrite = DFParser::parse_sql(query)?;
+            assert_eq!(rewrite.len(), 1);
+            self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1
         } else {
             plan_err!("SHOW TABLES is not supported unless information_schema is enabled")
         }
@@ -1842,22 +1914,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         extended: bool,
         full: bool,
         sql_table_name: ObjectName,
-        filter: Option<ShowStatementFilter>,
     ) -> Result<LogicalPlan> {
-        if filter.is_some() {
-            return plan_err!("SHOW COLUMNS with WHERE or LIKE is not supported");
-        }
+        // Figure out the where clause
+        let where_clause = object_name_to_qualifier(
+            &sql_table_name,
+            self.options.enable_ident_normalization,
+        );
 
         if !self.has_table("information_schema", "columns") {
             return plan_err!(
                 "SHOW COLUMNS is not supported unless information_schema is enabled"
             );
         }
-        // Figure out the where clause
-        let where_clause = object_name_to_qualifier(
-            &sql_table_name,
-            self.options.enable_ident_normalization,
-        );
 
         // Do a table lookup to verify the table exists
         let table_ref = self.object_name_to_table_reference(sql_table_name)?;
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index cc0812cd71e1..8812bbcbe0b7 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -24,6 +24,7 @@
 use core::fmt;
 
 use sqlparser::ast;
+use sqlparser::ast::helpers::attached_token::AttachedToken;
 
 #[derive(Clone)]
 pub(super) struct QueryBuilder {
@@ -268,6 +269,7 @@ impl SelectBuilder {
             connect_by: None,
             window_before_qualify: false,
             prewhere: None,
+            select_token: AttachedToken::empty(),
         })
     }
     fn create_empty() -> Self {
@@ -458,6 +460,7 @@ impl TableRelationBuilder {
             version: self.version.clone(),
             partitions: self.partitions.clone(),
             with_ordinality: false,
+            json_path: None,
         })
     }
     fn create_empty() -> Self {
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index e979d8fd4ebd..2657d9970f1a 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -18,15 +18,15 @@
 use std::sync::Arc;
 
 use arrow_schema::TimeUnit;
+use datafusion_common::Result;
 use datafusion_expr::Expr;
 use regex::Regex;
+use sqlparser::tokenizer::Span;
 use sqlparser::{
     ast::{self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo},
     keywords::ALL_KEYWORDS,
 };
 
-use datafusion_common::Result;
-
 use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser};
 
 /// `Dialect` to use for Unparsing
@@ -279,6 +279,7 @@ impl PostgreSqlDialect {
             name: ObjectName(vec![Ident {
                 value: func_name.to_string(),
                 quote_style: None,
+                span: Span::empty(),
             }]),
             args: ast::FunctionArguments::List(ast::FunctionArgumentList {
                 duplicate_treatment: None,
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 48dfc425ee63..88b2e79558ca 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -43,6 +43,8 @@ use datafusion_expr::{
     expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction},
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
 };
+use sqlparser::ast::helpers::attached_token::AttachedToken;
+use sqlparser::tokenizer::Span;
 
 /// Convert a DataFusion [`Expr`] to [`ast::Expr`]
 ///
@@ -233,6 +235,7 @@ impl Unparser<'_> {
                     name: ObjectName(vec![Ident {
                         value: func_name.to_string(),
                         quote_style: None,
+                        span: Span::empty(),
                     }]),
                     args: ast::FunctionArguments::List(ast::FunctionArgumentList {
                         duplicate_treatment: None,
@@ -278,6 +281,7 @@ impl Unparser<'_> {
                     name: ObjectName(vec![Ident {
                         value: func_name.to_string(),
                         quote_style: None,
+                        span: Span::empty(),
                     }]),
                     args: ast::FunctionArguments::List(ast::FunctionArgumentList {
                         duplicate_treatment: agg
@@ -404,12 +408,16 @@ impl Unparser<'_> {
             }
             // TODO: unparsing wildcard addition options
             Expr::Wildcard { qualifier, .. } => {
+                let attached_token = AttachedToken::empty();
                 if let Some(qualifier) = qualifier {
                     let idents: Vec<Ident> =
                         qualifier.to_vec().into_iter().map(Ident::new).collect();
-                    Ok(ast::Expr::QualifiedWildcard(ObjectName(idents)))
+                    Ok(ast::Expr::QualifiedWildcard(
+                        ObjectName(idents),
+                        attached_token,
+                    ))
                 } else {
-                    Ok(ast::Expr::Wildcard)
+                    Ok(ast::Expr::Wildcard(attached_token))
                 }
             }
             Expr::GroupingSet(grouping_set) => match grouping_set {
@@ -480,6 +488,7 @@ impl Unparser<'_> {
             name: ObjectName(vec![Ident {
                 value: func_name.to_string(),
                 quote_style: None,
+                span: Span::empty(),
             }]),
             args: ast::FunctionArguments::List(ast::FunctionArgumentList {
                 duplicate_treatment: None,
@@ -709,6 +718,7 @@ impl Unparser<'_> {
         Ident {
             value: ident,
             quote_style,
+            span: Span::empty(),
         }
     }
 
@@ -716,6 +726,7 @@ impl Unparser<'_> {
         Ident {
             value: str,
             quote_style: None,
+            span: Span::empty(),
         }
     }
 
@@ -1481,6 +1492,7 @@ impl Unparser<'_> {
             name: ObjectName(vec![Ident {
                 value: "UNNEST".to_string(),
                 quote_style: None,
+                span: Span::empty(),
             }]),
             args: ast::FunctionArguments::List(ast::FunctionArgumentList {
                 duplicate_treatment: None,
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 81e47ed939f2..1a1d4c42b0ff 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -42,7 +42,7 @@ use datafusion_expr::{
     expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
     LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan,
 };
-use sqlparser::ast::{self, Ident, SetExpr};
+use sqlparser::ast::{self, Ident, SetExpr, TableAliasColumnDef};
 use std::sync::Arc;
 
 /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`]
@@ -1006,6 +1006,13 @@ impl Unparser<'_> {
     }
 
     fn new_table_alias(&self, alias: String, columns: Vec<Ident>) -> ast::TableAlias {
+        let columns = columns
+            .into_iter()
+            .map(|ident| TableAliasColumnDef {
+                name: ident,
+                data_type: None,
+            })
+            .collect();
         ast::TableAlias {
             name: self.new_ident_quoted_if_needs(alias),
             columns,
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index c77af7924384..38dcbe981efa 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -17,6 +17,10 @@
 
 use std::{cmp::Ordering, sync::Arc, vec};
 
+use super::{
+    dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle,
+    rewrite::TableAliasRewriter, Unparser,
+};
 use datafusion_common::{
     internal_err,
     tree_node::{Transformed, TransformedResult, TreeNode},
@@ -28,11 +32,7 @@ use datafusion_expr::{
 };
 use indexmap::IndexSet;
 use sqlparser::ast;
-
-use super::{
-    dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle,
-    rewrite::TableAliasRewriter, Unparser,
-};
+use sqlparser::tokenizer::Span;
 
 /// Recursively searches children of [LogicalPlan] to find an Aggregate node if exists
 /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor).
@@ -425,6 +425,7 @@ pub(crate) fn date_part_to_sql(
                     name: ast::ObjectName(vec![ast::Ident {
                         value: "strftime".to_string(),
                         quote_style: None,
+                        span: Span::empty(),
                     }]),
                     args: ast::FunctionArguments::List(ast::FunctionArgumentList {
                         duplicate_treatment: None,
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 4d51a61c8a52..95098038ae2f 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -483,10 +483,10 @@ set datafusion.catalog.information_schema = true;
 statement ok
 CREATE TABLE t AS SELECT 1::int as i;
 
-statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported
+statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported
 SHOW COLUMNS FROM t LIKE 'f';
 
-statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported
+statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported
 SHOW COLUMNS FROM t WHERE column_name = 'bar';
 
 query TTTTTT

From 34c388a370642b22bfb46f52dbede005799d3490 Mon Sep 17 00:00:00 2001
From: Elia Perantoni <eper@wayfare.ai>
Date: Thu, 5 Dec 2024 15:08:22 +0100
Subject: [PATCH 3/8] feat: init diagnostics

---
 Cargo.toml                                    |  1 +
 datafusion/common/Cargo.toml                  |  1 +
 datafusion/common/src/column.rs               | 33 ++++++-
 datafusion/common/src/dfschema.rs             |  2 +
 datafusion/common/src/diagnostic.rs           | 21 ++++
 datafusion/common/src/error.rs                | 42 ++++++++
 datafusion/common/src/lib.rs                  |  4 +
 datafusion/common/src/with_span.rs            | 96 +++++++++++++++++++
 .../core/tests/execution/logical_plan.rs      |  2 +
 datafusion/expr-common/Cargo.toml             |  1 +
 .../expr-common/src/type_coercion/binary.rs   | 87 ++++++++++++-----
 datafusion/expr/src/expr.rs                   | 10 +-
 datafusion/expr/src/expr_rewriter/mod.rs      |  5 +-
 datafusion/expr/src/expr_rewriter/order_by.rs |  3 +
 datafusion/expr/src/expr_schema.rs            | 25 ++++-
 datafusion/expr/src/logical_plan/builder.rs   |  2 +
 .../src/analyzer/expand_wildcard_rule.rs      | 20 ++--
 .../optimizer/src/analyzer/type_coercion.rs   |  1 +
 datafusion/optimizer/src/decorrelate.rs       |  2 +-
 datafusion/sql/src/expr/identifier.rs         |  4 +
 datafusion/sql/src/parser.rs                  |  4 +-
 datafusion/sql/src/unparser/expr.rs           |  2 +
 datafusion/sql/src/unparser/plan.rs           |  2 +
 23 files changed, 326 insertions(+), 44 deletions(-)
 create mode 100644 datafusion/common/src/diagnostic.rs
 create mode 100644 datafusion/common/src/with_span.rs

diff --git a/Cargo.toml b/Cargo.toml
index 6e2127430a8e..99c441751721 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -149,6 +149,7 @@ sqlparser = { version = "0.52.0", features = ["visitor"] }
 tempfile = "3"
 tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
 url = "2.2"
+derivative = "2.2.0"
 
 [profile.release]
 codegen-units = 1
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index d76848dfe95e..38448b7c3626 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -65,6 +65,7 @@ pyo3 = { version = "0.22.0", optional = true }
 recursive = { workspace = true }
 sqlparser = { workspace = true }
 tokio = { workspace = true }
+derivative = { workspace = true }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
 web-time = "1.1.0"
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index d940bcf3146e..fe29a24147c3 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -18,22 +18,35 @@
 //! Column
 
 use arrow_schema::{Field, FieldRef};
+use sqlparser::tokenizer::Span;
 
 use crate::error::_schema_err;
 use crate::utils::{parse_identifiers_normalized, quote_identifier};
 use crate::{DFSchema, Result, SchemaError, TableReference};
+use derivative::Derivative;
 use std::collections::HashSet;
 use std::convert::Infallible;
 use std::fmt;
 use std::str::FromStr;
 
 /// A named reference to a qualified field in a schema.
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Debug, Clone, Derivative)]
+#[derivative(PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Column {
     /// relation/table reference.
     pub relation: Option<TableReference>,
     /// field/column name.
     pub name: String,
+    #[derivative(
+        PartialEq = "ignore",
+        Hash = "ignore",
+        PartialOrd = "ignore",
+        Ord = "ignore"
+    )]
+    /// The location in the source code where this column originally came from.
+    /// Does not change as the [`Column`] undergoes normalization and other
+    /// transformations.
+    pub span: Span,
 }
 
 impl Column {
@@ -50,6 +63,7 @@ impl Column {
         Self {
             relation: relation.map(|r| r.into()),
             name: name.into(),
+            span: Span::empty(),
         }
     }
 
@@ -58,6 +72,7 @@ impl Column {
         Self {
             relation: None,
             name: name.into(),
+            span: Span::empty(),
         }
     }
 
@@ -68,6 +83,7 @@ impl Column {
         Self {
             relation: None,
             name: name.into(),
+            span: Span::empty(),
         }
     }
 
@@ -99,7 +115,11 @@ impl Column {
             // identifiers will be treated as an unqualified column name
             _ => return None,
         };
-        Some(Self { relation, name })
+        Some(Self {
+            relation,
+            name,
+            span: Span::empty(),
+        })
     }
 
     /// Deserialize a fully qualified name string into a column
@@ -113,6 +133,7 @@ impl Column {
             Self {
                 relation: None,
                 name: flat_name,
+                span: Span::empty(),
             },
         )
     }
@@ -124,6 +145,7 @@ impl Column {
             Self {
                 relation: None,
                 name: flat_name,
+                span: Span::empty(),
             },
         )
     }
@@ -254,6 +276,13 @@ impl Column {
                 .collect(),
         })
     }
+
+    /// Attaches a [`Span`] to the [`Column`], i.e. its location in the source
+    /// SQL query.
+    pub fn with_span(mut self, span: Span) -> Self {
+        self.span = span;
+        self
+    }
 }
 
 impl From<&str> for Column {
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 45620c3cacc8..b53b475edaa0 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -32,6 +32,7 @@ use crate::{
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
 use arrow_schema::SchemaBuilder;
+use sqlparser::tokenizer::Span;
 
 /// A reference-counted reference to a [DFSchema].
 pub type DFSchemaRef = Arc<DFSchema>;
@@ -505,6 +506,7 @@ impl DFSchema {
                         field: Column {
                             relation: None,
                             name: name.to_string(),
+                            span: Span::empty(),
                         },
                     })
                 }
diff --git a/datafusion/common/src/diagnostic.rs b/datafusion/common/src/diagnostic.rs
new file mode 100644
index 000000000000..1ebf3f9ed566
--- /dev/null
+++ b/datafusion/common/src/diagnostic.rs
@@ -0,0 +1,21 @@
+use sqlparser::tokenizer::Span;
+
+#[derive(Debug, Clone)]
+pub struct Diagnostic {
+    pub entries: Vec<DiagnosticEntry>,
+}
+
+#[derive(Debug, Clone)]
+pub struct DiagnosticEntry {
+    pub span: Span,
+    pub message: String,
+    pub kind: DiagnosticEntryKind,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum DiagnosticEntryKind {
+    Error,
+    Warning,
+    Note,
+    Help,
+}
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 4fac7298c455..39916e377aae 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -26,6 +26,7 @@ use std::io;
 use std::result;
 use std::sync::Arc;
 
+use crate::diagnostic::Diagnostic;
 use crate::utils::quote_identifier;
 use crate::{Column, DFSchema, TableReference};
 #[cfg(feature = "avro")]
@@ -131,6 +132,7 @@ pub enum DataFusionError {
     /// Errors from either mapping LogicalPlans to/from Substrait plans
     /// or serializing/deserializing protobytes to Substrait plans
     Substrait(String),
+    Diagnostic(Diagnostic, Box<DataFusionError>),
 }
 
 #[macro_export]
@@ -316,6 +318,7 @@ impl Error for DataFusionError {
             DataFusionError::External(e) => Some(e.as_ref()),
             DataFusionError::Context(_, e) => Some(e.as_ref()),
             DataFusionError::Substrait(_) => None,
+            DataFusionError::Diagnostic(_, e) => Some(e.as_ref()),
         }
     }
 }
@@ -372,6 +375,11 @@ impl DataFusionError {
         Self::Context(description.into(), Box::new(self))
     }
 
+    /// wraps self in Self::Diagnostic with a [`Diagnostic`]
+    pub fn with_diagnostic(self, diag: Diagnostic) -> Self {
+        Self::Diagnostic(diag, Box::new(self))
+    }
+
     /// Strips backtrace out of the error message
     /// If backtrace enabled then error has a format "message" [`Self::BACK_TRACE_SEP`] "backtrace"
     /// The method strips the backtrace and outputs "message"
@@ -429,6 +437,7 @@ impl DataFusionError {
             DataFusionError::External(_) => "External error: ",
             DataFusionError::Context(_, _) => "",
             DataFusionError::Substrait(_) => "Substrait error: ",
+            DataFusionError::Diagnostic(_, _) => "",
         }
     }
 
@@ -469,7 +478,40 @@ impl DataFusionError {
                 Cow::Owned(format!("{desc}\ncaused by\n{}", *err))
             }
             DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()),
+            DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()),
+        }
+    }
+
+    pub fn get_diagnostics(&self) -> impl Iterator<Item = &Diagnostic> + '_ {
+        struct DiagnosticsIterator<'a> {
+            head: &'a DataFusionError,
         }
+
+        impl<'a> Iterator for DiagnosticsIterator<'a> {
+            type Item = &'a Diagnostic;
+
+            fn next(&mut self) -> Option<Self::Item> {
+                loop {
+                    if let DataFusionError::Diagnostic(diagnostics, source) = self.head {
+                        self.head = source.as_ref();
+                        return Some(diagnostics);
+                    }
+
+                    if let Some(source) = self
+                        .head
+                        .source()
+                        .map(|source| source.downcast_ref::<DataFusionError>())
+                        .flatten()
+                    {
+                        self.head = source;
+                    } else {
+                        return None;
+                    }
+                }
+            }
+        }
+
+        DiagnosticsIterator { head: self }
     }
 }
 
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 77e8cd60ede2..8eed7e18005a 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -47,6 +47,8 @@ pub mod test_util;
 pub mod tree_node;
 pub mod types;
 pub mod utils;
+pub mod diagnostic;
+pub mod with_span;
 
 /// Reexport arrow crate
 pub use arrow;
@@ -76,6 +78,8 @@ pub use stats::{ColumnStatistics, Statistics};
 pub use table_reference::{ResolvedTableReference, TableReference};
 pub use unnest::{RecursionUnnestOption, UnnestOptions};
 pub use utils::project_schema;
+pub use diagnostic::{Diagnostic, DiagnosticEntry, DiagnosticEntryKind};
+pub use with_span::WithSpan;
 
 // These are hidden from docs purely to avoid polluting the public view of what this crate exports.
 // These are just re-exports of macros by the same name, which gets around the 'cannot refer to
diff --git a/datafusion/common/src/with_span.rs b/datafusion/common/src/with_span.rs
new file mode 100644
index 000000000000..d2acddaa0d2b
--- /dev/null
+++ b/datafusion/common/src/with_span.rs
@@ -0,0 +1,96 @@
+use std::{cmp::Ordering, fmt::{self, Debug, Display}, ops::{Deref, DerefMut}};
+
+use sqlparser::tokenizer::Span;
+
+/// A wrapper type for entitites that somehow refer to a location in the source
+/// code. For example, a [`DataType`](arrow_schema::DataType) can be tied to an
+/// expression which is being planned for execution, and the expression is in
+/// turn located somewhere in the source code.
+pub struct WithSpan<T> {
+    pub span: Span,
+    pub inner: T,
+}
+
+impl<T> WithSpan<T> {
+    pub fn into_inner(self) -> T {
+        self.inner
+    }
+
+    pub fn new(inner: T, span: Span) -> Self {
+        Self { span, inner }
+    }
+
+    pub fn new_without_span(inner: T) -> Self {
+        Self {
+            span: Span::empty(),
+            inner,
+        }
+    }
+}
+
+impl<T> From<T> for WithSpan<T> {
+    fn from(inner: T) -> Self {
+        Self::new_without_span(inner)
+    }
+}
+
+impl<T> Deref for WithSpan<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+impl<T> DerefMut for WithSpan<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.inner
+    }
+}
+
+impl<T> AsRef<T> for WithSpan<T> {
+    fn as_ref(&self) -> &T {
+        &self.inner
+    }
+}
+
+impl<T: PartialEq> PartialEq for WithSpan<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.inner.eq(&other.inner)
+    }
+}
+
+impl<T: Eq> Eq for WithSpan<T> {}
+
+impl<T: PartialOrd> PartialOrd for WithSpan<T> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.inner.partial_cmp(&other.inner)
+    }
+}
+
+impl<T: Ord> Ord for WithSpan<T> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.inner.cmp(&other.inner)
+    }
+}
+
+impl<T: Debug> Debug for WithSpan<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl<T: Display> Display for WithSpan<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl<T: Clone> Clone for WithSpan<T> {
+    fn clone(&self) -> Self {
+        Self {
+            span: self.span,
+            inner: self.inner.clone(),
+        }
+    }
+}
diff --git a/datafusion/core/tests/execution/logical_plan.rs b/datafusion/core/tests/execution/logical_plan.rs
index 168bf484e541..b405783d17a9 100644
--- a/datafusion/core/tests/execution/logical_plan.rs
+++ b/datafusion/core/tests/execution/logical_plan.rs
@@ -25,6 +25,7 @@ use datafusion_expr::logical_plan::{LogicalPlan, Values};
 use datafusion_expr::{Aggregate, AggregateUDF, Expr};
 use datafusion_functions_aggregate::count::Count;
 use datafusion_physical_plan::collect;
+use sqlparser::tokenizer::Span;
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::ops::Deref;
@@ -51,6 +52,7 @@ async fn count_only_nulls() -> Result<()> {
     let input_col_ref = Expr::Column(Column {
         relation: None,
         name: "col".to_string(),
+        span: Span::empty(),
     });
 
     // Aggregation: count(col) AS count
diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml
index 109d8e0b89a6..7f2183dc2892 100644
--- a/datafusion/expr-common/Cargo.toml
+++ b/datafusion/expr-common/Cargo.toml
@@ -41,3 +41,4 @@ arrow = { workspace = true }
 datafusion-common = { workspace = true }
 itertools = { workspace = true }
 paste = "^1.0"
+sqlparser = { workspace = true }
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 4b2f3b5e46b5..9e392c5dbd13 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -30,9 +30,11 @@ use arrow::datatypes::{
 };
 use datafusion_common::types::NativeType;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, plan_datafusion_err, plan_err, Result,
+    exec_datafusion_err, exec_err, internal_err, plan_datafusion_err, plan_err,
+    Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result, WithSpan,
 };
 use itertools::Itertools;
+use sqlparser::tokenizer::Span;
 
 /// The type signature of an instantiation of binary operator expression such as
 /// `lhs + rhs`
@@ -69,10 +71,19 @@ impl Signature {
 }
 
 /// Returns a [`Signature`] for applying `op` to arguments of type `lhs` and `rhs`
-fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature> {
+fn signature<'a>(
+    lhs: impl Into<WithSpan<&'a DataType>>,
+    op: impl Into<WithSpan<&'a Operator>>,
+    rhs: impl Into<WithSpan<&'a DataType>>,
+) -> Result<Signature> {
     use arrow::datatypes::DataType::*;
     use Operator::*;
-    match op {
+
+    let lhs: WithSpan<_> = lhs.into();
+    let op: WithSpan<_> = op.into();
+    let rhs: WithSpan<_> = rhs.into();
+
+    match *op {
         Eq |
         NotEq |
         Lt |
@@ -81,13 +92,13 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
         GtEq |
         IsDistinctFrom |
         IsNotDistinctFrom => {
-            comparison_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+            comparison_coercion(&lhs, &rhs).map(Signature::comparison).ok_or_else(|| {
                 plan_datafusion_err!(
                     "Cannot infer common argument type for comparison operation {lhs} {op} {rhs}"
                 )
             })
         }
-        And | Or => if matches!((lhs, rhs), (Boolean | Null, Boolean | Null)) {
+        And | Or => if matches!((*lhs, *rhs), (Boolean | Null, Boolean | Null)) {
             // Logical binary boolean operators can only be evaluated for
             // boolean or null arguments.                   
             Ok(Signature::uniform(Boolean))
@@ -97,28 +108,28 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
             )
         }
         RegexMatch | RegexIMatch | RegexNotMatch | RegexNotIMatch => {
-            regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+            regex_coercion(lhs.as_ref(), rhs.as_ref()).map(Signature::comparison).ok_or_else(|| {
                 plan_datafusion_err!(
                     "Cannot infer common argument type for regex operation {lhs} {op} {rhs}"
                 )
             })
         }
         LikeMatch | ILikeMatch | NotLikeMatch | NotILikeMatch => {
-            regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+            regex_coercion(lhs.as_ref(), rhs.as_ref()).map(Signature::comparison).ok_or_else(|| {
                 plan_datafusion_err!(
                     "Cannot infer common argument type for regex operation {lhs} {op} {rhs}"
                 )
             })
         }
         BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseShiftRight | BitwiseShiftLeft => {
-            bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
+            bitwise_coercion(lhs.as_ref(), rhs.as_ref()).map(Signature::uniform).ok_or_else(|| {
                 plan_datafusion_err!(
                     "Cannot infer common type for bitwise operation {lhs} {op} {rhs}"
                 )
             })
         }
         StringConcat => {
-            string_concat_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
+            string_concat_coercion(lhs.as_ref(), rhs.as_ref()).map(Signature::uniform).ok_or_else(|| {
                 plan_datafusion_err!(
                     "Cannot infer common string type for string concat operation {lhs} {op} {rhs}"
                 )
@@ -128,7 +139,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
             // ArrowAt and AtArrow check for whether one array is contained in another.
             // The result type is boolean. Signature::comparison defines this signature.
             // Operation has nothing to do with comparison
-            array_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
+            array_coercion(lhs.as_ref(), rhs.as_ref()).map(Signature::comparison).ok_or_else(|| {
                 plan_datafusion_err!(
                     "Cannot infer common array type for arrow operation {lhs} {op} {rhs}"
                 )
@@ -140,7 +151,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
                 let l = new_empty_array(lhs);
                 let r = new_empty_array(rhs);
 
-                let result = match op {
+                let result = match *op {
                     Plus => add_wrapping(&l, &r),
                     Minus => sub_wrapping(&l, &r),
                     Multiply => mul_wrapping(&l, &r),
@@ -151,14 +162,14 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
                 result.map(|x| x.data_type().clone())
             };
 
-            if let Ok(ret) = get_result(lhs, rhs) {
+            if let Ok(ret) = get_result(lhs.as_ref(), rhs.as_ref()) {
                 // Temporal arithmetic, e.g. Date32 + Interval
                 Ok(Signature{
-                    lhs: lhs.clone(),
-                    rhs: rhs.clone(),
+                    lhs: lhs.into_inner().clone(),
+                    rhs: rhs.into_inner().clone(),
                     ret,
                 })
-            } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) {
+            } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs.as_ref(), rhs.as_ref()) {
                 // Temporal arithmetic by first coercing to a common time representation
                 // e.g. Date32 - Timestamp
                 let ret = get_result(&coerced, &coerced).map_err(|e| {
@@ -171,7 +182,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
                     rhs: coerced,
                     ret,
                 })
-            } else if let Some((lhs, rhs)) = math_decimal_coercion(lhs, rhs) {
+            } else if let Some((lhs, rhs)) = math_decimal_coercion(lhs.as_ref(), rhs.as_ref()) {
                 // Decimal arithmetic, e.g. Decimal(10, 2) + Decimal(10, 0)
                 let ret = get_result(&lhs, &rhs).map_err(|e| {
                     plan_datafusion_err!(
@@ -183,32 +194,56 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
                     rhs,
                     ret,
                 })
-            } else if let Some(numeric) = mathematics_numerical_coercion(lhs, rhs) {
+            } else if let Some(numeric) = mathematics_numerical_coercion(lhs.as_ref(), rhs.as_ref()) {
                 // Numeric arithmetic, e.g. Int32 + Int32
                 Ok(Signature::uniform(numeric))
             } else {
                 plan_err!(
                     "Cannot coerce arithmetic expression {lhs} {op} {rhs} to valid types"
-                )
+                ).map_err(|err | {
+                    if lhs.span == Span::empty() || rhs.span == Span::empty() {
+                        err
+                    } else {
+                        err.with_diagnostic(Diagnostic {
+                            entries: vec![
+                                DiagnosticEntry {
+                                    kind: DiagnosticEntryKind::Error,
+                                    message: "Binary expression has invalid types".to_string(),
+                                    span: lhs.span.union(&rhs.span),
+                                },
+                                DiagnosticEntry {
+                                    kind: DiagnosticEntryKind::Note,
+                                    message: format!("Is of type {}", *lhs),
+                                    span: lhs.span,
+                                },
+                                DiagnosticEntry {
+                                    kind: DiagnosticEntryKind::Note,
+                                    message: format!("Is of type {}", *rhs),
+                                    span: rhs.span,
+                                }
+                            ]
+                        })
+                    }
+                })
             }
         }
     }
 }
 
 /// Returns the resulting type of a binary expression evaluating the `op` with the left and right hand types
-pub fn get_result_type(
-    lhs: &DataType,
-    op: &Operator,
-    rhs: &DataType,
+pub fn get_result_type<'a>(
+    lhs: impl Into<WithSpan<&'a DataType>>,
+    op: impl Into<WithSpan<&'a Operator>>,
+    rhs: impl Into<WithSpan<&'a DataType>>,
 ) -> Result<DataType> {
     signature(lhs, op, rhs).map(|sig| sig.ret)
 }
 
 /// Returns the coerced input types for a binary expression evaluating the `op` with the left and right hand types
-pub fn get_input_types(
-    lhs: &DataType,
-    op: &Operator,
-    rhs: &DataType,
+pub fn get_input_types<'a>(
+    lhs: impl Into<WithSpan<&'a DataType>>,
+    op: impl Into<WithSpan<&'a Operator>>,
+    rhs: impl Into<WithSpan<&'a DataType>>,
 ) -> Result<(DataType, DataType)> {
     signature(lhs, op, rhs).map(|sig| (sig.lhs, sig.rhs))
 }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index d0e98327f34b..c5eaa4bf88e4 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -42,6 +42,7 @@ use sqlparser::ast::{
     display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
     NullTreatment, RenameSelectItem, ReplaceSelectElement,
 };
+use sqlparser::tokenizer::Span;
 
 /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
 ///
@@ -1113,7 +1114,7 @@ impl Expr {
     /// output schema. We can use this qualified name to reference the field.
     pub fn qualified_name(&self) -> (Option<TableReference>, String) {
         match self {
-            Expr::Column(Column { relation, name }) => (relation.clone(), name.clone()),
+            Expr::Column(Column { relation, name, span: _ }) => (relation.clone(), name.clone()),
             Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
             _ => (None, self.schema_name().to_string()),
         }
@@ -1665,6 +1666,13 @@ impl Expr {
             | Expr::Placeholder(..) => false,
         }
     }
+
+    pub fn get_span(&self) -> Option<Span> {
+        match self {
+            Expr::Column(Column{span, ..}) => Some(*span),
+            _ => None,
+        }
+    }
 }
 
 impl HashNode for Expr {
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index b944428977c4..c32f217c933b 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -160,6 +160,7 @@ pub fn unnormalize_col(expr: Expr) -> Expr {
                 let col = Column {
                     relation: None,
                     name: c.name,
+                    span: c.span,
                 };
                 Transformed::yes(Expr::Column(col))
             } else {
@@ -181,10 +182,10 @@ pub fn create_col_from_scalar_expr(
             Some::<TableReference>(subqry_alias.into()),
             name,
         )),
-        Expr::Column(Column { relation: _, name }) => Ok(Column::new(
+        Expr::Column(Column { relation: _, name, span }) => Ok(Column::new(
             Some::<TableReference>(subqry_alias.into()),
             name,
-        )),
+        ).with_span(*span)),
         _ => {
             let scalar_column = scalar_expr.schema_name().to_string();
             Ok(Column::new(
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index f0d3d8fcd0c1..c49197f78eb9 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -23,6 +23,7 @@ use crate::{expr::Sort, Cast, Expr, LogicalPlan, TryCast};
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Column, Result};
+use sqlparser::tokenizer::Span;
 
 /// Rewrite sort on aggregate expressions to sort on the column of aggregate output
 /// For example, `max(x)` is written to `col("max(x)")`
@@ -101,6 +102,8 @@ fn rewrite_in_terms_of_projection(
         let search_col = Expr::Column(Column {
             relation: None,
             name,
+            // Span is not used in PartialEq
+            span: Span::empty(),
         });
 
         // look for the column named the same as this expr
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index b1a461eca41d..88dd74ded7d5 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -29,7 +29,7 @@ use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
     not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result,
-    TableReference,
+    TableReference, WithSpan,
 };
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use recursive::recursive;
@@ -403,9 +403,26 @@ impl ExprSchemable for Expr {
                 ref right,
                 ref op,
             }) => {
-                let left = left.data_type_and_nullable(schema)?;
-                let right = right.data_type_and_nullable(schema)?;
-                Ok((get_result_type(&left.0, op, &right.0)?, left.1 || right.1))
+                let (left_type, left_is_nullable) =
+                    left.data_type_and_nullable(schema)?;
+                let left_type = if let Some(span) = left.get_span() {
+                    WithSpan::new(&left_type, span)
+                } else {
+                    (&left_type).into()
+                };
+
+                let (right_type, right_is_nullable) =
+                    right.data_type_and_nullable(schema)?;
+                let right_type = if let Some(span) = right.get_span() {
+                    WithSpan::new(&right_type, span)
+                } else {
+                    (&right_type).into()
+                };
+
+                Ok((
+                    get_result_type(left_type, op, right_type)?,
+                    left_is_nullable || right_is_nullable,
+                ))
             }
             Expr::WindowFunction(window_function) => {
                 self.data_type_and_nullable_with_window_function(schema, window_function)
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 90235e3f84c4..4a8c5d08e284 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1967,6 +1967,7 @@ mod tests {
     use crate::{col, expr, expr_fn::exists, in_subquery, lit, scalar_subquery};
 
     use datafusion_common::{RecursionUnnestOption, SchemaError};
+    use sqlparser::tokenizer::Span;
 
     #[test]
     fn plan_builder_simple() -> Result<()> {
@@ -2211,6 +2212,7 @@ mod tests {
                         Column {
                             relation: Some(TableReference::Bare { table }),
                             name,
+                            span: _,
                         },
                 },
                 _,
diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
index 9fbe54e1ccb9..1853f0aaf3ec 100644
--- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
@@ -123,21 +123,29 @@ fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> {
             Expr::Column(Column {
                 ref relation,
                 ref name,
+                span,
             }) => {
                 if name.eq("*") {
-                    if let Some(qualifier) = relation {
-                        projected_expr.extend(expand_qualified_wildcard(
+                    let expanded_columns = if let Some(qualifier) = relation {
+                        expand_qualified_wildcard(
                             qualifier,
                             input.schema(),
                             None,
-                        )?);
+                        )?
                     } else {
-                        projected_expr.extend(expand_wildcard(
+                        expand_wildcard(
                             input.schema(),
                             input,
                             None,
-                        )?);
-                    }
+                        )?
+                    };
+
+                    projected_expr.extend(expanded_columns.into_iter().map(|mut expr| {
+                        if let Expr::Column(c) = &mut expr {
+                            c.span = span;
+                        }
+                        expr
+                    }));
                 } else {
                     projected_expr.push(e.clone());
                 }
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 58e390ee8bdb..1bd4009b47cc 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -1000,6 +1000,7 @@ fn project_with_column_index(
             Expr::Column(Column {
                 relation: _,
                 ref name,
+                span: _,
             }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
             Expr::Alias { .. } | Expr::Column { .. } => e,
             _ => e.alias(schema.field(i).name()),
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index b5726d999137..5ebd73799dd5 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -532,7 +532,7 @@ fn proj_exprs_evaluation_result_on_empty_batch(
             let result_expr = simplifier.simplify(result_expr)?;
             let expr_name = match expr {
                 Expr::Alias(Alias { name, .. }) => name.to_string(),
-                Expr::Column(Column { relation: _, name }) => name.to_string(),
+                Expr::Column(Column { relation: _, name, span: _ }) => name.to_string(),
                 _ => expr.schema_name().to_string(),
             };
             expr_result_map_for_count_bug.insert(expr_name, result_expr);
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 9adf14459081..d889d5e20152 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -46,6 +46,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 })?;
             Ok(Expr::ScalarVariable(ty, var_names))
         } else {
+            let id_span = id.span;
+
             // Don't use `col()` here because it will try to
             // interpret names with '.' as if they were
             // compound identifiers, but this is not a compound
@@ -59,6 +61,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 return Ok(Expr::Column(Column {
                     relation: qualifier.filter(|q| q.table() != UNNAMED_TABLE).cloned(),
                     name: normalize_ident,
+                    span: id_span,
                 }));
             }
 
@@ -79,6 +82,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Ok(Expr::Column(Column {
                 relation: None,
                 name: normalize_ident,
+                span: id_span,
             }))
         }
     }
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index fb83ecea88b8..68282782575c 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -283,10 +283,10 @@ impl<'a> DFParser<'a> {
         dialect: &'a dyn Dialect,
     ) -> Result<Self, ParserError> {
         let mut tokenizer = Tokenizer::new(dialect, sql);
-        let tokens = tokenizer.tokenize()?;
+        let tokens = tokenizer.tokenize_with_location()?;
 
         Ok(DFParser {
-            parser: Parser::new(dialect).with_tokens(tokens),
+            parser: Parser::new(dialect).with_tokens_with_locations(tokens),
         })
     }
 
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 88b2e79558ca..3ddec1a87082 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1695,6 +1695,7 @@ mod tests {
                 Expr::Column(Column {
                     relation: Some(TableReference::partial("a", "b")),
                     name: "c".to_string(),
+                    span: Span::empty(),
                 })
                 .gt(lit(4)),
                 r#"(b.c > 4)"#,
@@ -2032,6 +2033,7 @@ mod tests {
                     expr: Box::new(Expr::Column(Column {
                         relation: Some(TableReference::partial("schema", "table")),
                         name: "array_col".to_string(),
+                        span: Span::empty(),
                     })),
                 }),
                 r#"UNNEST("table".array_col)"#,
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 1a1d4c42b0ff..5f015c6465fc 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -911,10 +911,12 @@ impl Unparser<'_> {
                     Expr::Column(Column {
                         relation: _,
                         name: left_name,
+                        span: _,
                     }),
                     Expr::Column(Column {
                         relation: _,
                         name: right_name,
+                        span: _,
                     }),
                 ) if left_name == right_name => {
                     idents.push(self.new_ident_quoted_if_needs(left_name.to_string()));

From dee7959dbaa6d233092e0d80c40ebedb40317a06 Mon Sep 17 00:00:00 2001
From: Elia Perantoni <eper@wayfare.ai>
Date: Fri, 6 Dec 2024 14:36:25 +0100
Subject: [PATCH 4/8] feat: diagnostics for unknown columns and missing in
 `GROUP BY`

---
 datafusion/common/src/diagnostic.rs           | 39 ++++++++++++-
 datafusion/common/src/error.rs                | 10 +++-
 .../expr-common/src/type_coercion/binary.rs   |  2 +-
 datafusion/sql/src/expr/identifier.rs         | 20 +++++--
 datafusion/sql/src/planner.rs                 | 25 ++++++++-
 datafusion/sql/src/select.rs                  | 21 +++++--
 datafusion/sql/src/utils.rs                   | 55 +++++++++++++++----
 7 files changed, 145 insertions(+), 27 deletions(-)

diff --git a/datafusion/common/src/diagnostic.rs b/datafusion/common/src/diagnostic.rs
index 1ebf3f9ed566..130cbee0d9a1 100644
--- a/datafusion/common/src/diagnostic.rs
+++ b/datafusion/common/src/diagnostic.rs
@@ -12,10 +12,47 @@ pub struct DiagnosticEntry {
     pub kind: DiagnosticEntryKind,
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum DiagnosticEntryKind {
     Error,
     Warning,
     Note,
     Help,
 }
+
+impl Diagnostic {
+    pub fn new(entries: impl IntoIterator<Item = DiagnosticEntry>) -> Self {
+        Diagnostic {
+            entries: entries.into_iter().collect(),
+        }
+    }
+}
+
+impl FromIterator<DiagnosticEntry> for Diagnostic {
+    fn from_iter<T: IntoIterator<Item = DiagnosticEntry>>(iter: T) -> Self {
+        Diagnostic {
+            entries: iter.into_iter().collect(),
+        }
+    }
+}
+
+impl DiagnosticEntry {
+    pub fn new(
+        message: impl Into<String>,
+        kind: DiagnosticEntryKind,
+        span: Span,
+    ) -> Self {
+        DiagnosticEntry {
+            span,
+            message: message.into(),
+            kind,
+        }
+    }
+
+    pub fn new_without_span(
+        message: impl Into<String>,
+        kind: DiagnosticEntryKind,
+    ) -> Self {
+        Self::new(message, kind, Span::empty())
+    }
+}
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 39916e377aae..f1a816ceac29 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -28,7 +28,7 @@ use std::sync::Arc;
 
 use crate::diagnostic::Diagnostic;
 use crate::utils::quote_identifier;
-use crate::{Column, DFSchema, TableReference};
+use crate::{Column, DFSchema, DiagnosticEntry, DiagnosticEntryKind, TableReference};
 #[cfg(feature = "avro")]
 use apache_avro::Error as AvroError;
 use arrow::error::ArrowError;
@@ -376,8 +376,12 @@ impl DataFusionError {
     }
 
     /// wraps self in Self::Diagnostic with a [`Diagnostic`]
-    pub fn with_diagnostic(self, diag: Diagnostic) -> Self {
-        Self::Diagnostic(diag, Box::new(self))
+    pub fn with_diagnostic<F: FnOnce(&DataFusionError) -> Diagnostic>(
+        self,
+        f: F,
+    ) -> Self {
+        let diagnostic = f(&self);
+        Self::Diagnostic(diagnostic, Box::new(self))
     }
 
     /// Strips backtrace out of the error message
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 9e392c5dbd13..954807eae226 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -204,7 +204,7 @@ fn signature<'a>(
                     if lhs.span == Span::empty() || rhs.span == Span::empty() {
                         err
                     } else {
-                        err.with_diagnostic(Diagnostic {
+                        err.with_diagnostic(|_| Diagnostic {
                             entries: vec![
                                 DiagnosticEntry {
                                     kind: DiagnosticEntryKind::Error,
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index d889d5e20152..14c7bb8f51c1 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -20,10 +20,12 @@ use sqlparser::ast::{Expr as SQLExpr, Ident};
 
 use datafusion_common::{
     internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema,
-    DataFusionError, Result, TableReference,
+    DataFusionError, Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result,
+    TableReference,
 };
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{Case, Expr};
+use sqlparser::tokenizer::Span;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_expr::UNNAMED_TABLE;
@@ -112,6 +114,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 })?;
             Ok(Expr::ScalarVariable(ty, var_names))
         } else {
+            let span = ids.last().map(|id| id.span).unwrap_or(Span::empty());
             let ids = ids
                 .into_iter()
                 .map(|id| self.ident_normalizer.normalize(id))
@@ -139,9 +142,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     plan_err!("could not parse compound identifier from {ids:?}")
                 }
                 // Found matching field with no spare identifier(s)
-                Some((field, qualifier, _nested_names)) => {
-                    Ok(Expr::Column(Column::from((qualifier, field))))
-                }
+                Some((field, qualifier, _nested_names)) => Ok(Expr::Column(
+                    Column::from((qualifier, field)).with_span(span),
+                )),
                 None => {
                     // Return default where use all identifiers to not have a nested field
                     // this len check is because at 5 identifiers will have to have a nested field
@@ -176,14 +179,19 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                                     // safe unwrap as s can never be empty or exceed the bounds
                                     let (relation, column_name) =
                                         form_identifier(s).unwrap();
-                                    Ok(Expr::Column(Column::new(relation, column_name)))
+                                    Ok(Expr::Column(
+                                        Column::new(relation, column_name)
+                                            .with_span(span),
+                                    ))
                                 }
                             }
                         } else {
                             let s = &ids[0..ids.len()];
                             // Safe unwrap as s can never be empty or exceed the bounds
                             let (relation, column_name) = form_identifier(s).unwrap();
-                            Ok(Expr::Column(Column::new(relation, column_name)))
+                            Ok(Expr::Column(
+                                Column::new(relation, column_name).with_span(span),
+                            ))
                         }
                     }
                 }
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 6ef2eb4e7fb2..0b1481544d05 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -22,7 +22,8 @@ use std::vec;
 
 use arrow_schema::*;
 use datafusion_common::{
-    field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError,
+    field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, Diagnostic,
+    DiagnosticEntry, DiagnosticEntryKind, SchemaError,
 };
 use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
@@ -383,7 +384,20 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .try_for_each(|col| match col {
                 Expr::Column(col) => match &col.relation {
                     Some(r) => {
-                        schema.field_with_qualified_name(r, &col.name)?;
+                        schema.field_with_qualified_name(r, &col.name).map_err(
+                            |err| {
+                                err.with_diagnostic(|_| {
+                                    Diagnostic::new([DiagnosticEntry::new(
+                                        format!(
+                                            "No field named '{}' in relation '{}'",
+                                            col.name, r
+                                        ),
+                                        DiagnosticEntryKind::Error,
+                                        col.span,
+                                    )])
+                                })
+                            },
+                        )?;
                         Ok(())
                     }
                     None => {
@@ -396,6 +410,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 }
                 .map_err(|_: DataFusionError| {
                     field_not_found(col.relation.clone(), col.name.as_str(), schema)
+                        .with_diagnostic(|_| {
+                            Diagnostic::new([DiagnosticEntry::new(
+                                format!("No field named '{}'", col.name),
+                                DiagnosticEntryKind::Error,
+                                col.span,
+                            )])
+                        })
                 }),
                 _ => internal_err!("Not a column"),
             })
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 9899d92bde71..6d1b3f185ae1 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -22,10 +22,13 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use crate::utils::{
     check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs,
     resolve_columns, resolve_positions_to_exprs, rewrite_recursive_unnests_bottom_up,
+    CheckColumnsSatisfyExprsPurpose,
 };
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
-use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_common::{
+    not_impl_err, plan_err, Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result,
+};
 use datafusion_common::{RecursionUnnestOption, UnnestOptions};
 use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
 use datafusion_expr::expr_rewriter::{
@@ -44,6 +47,7 @@ use sqlparser::ast::{
     WildcardAdditionalOptions, WindowType,
 };
 use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
+use sqlparser::tokenizer::Span;
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
     /// Generate a logic plan from an SQL select
@@ -803,8 +807,17 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         check_columns_satisfy_exprs(
             &column_exprs_post_aggr,
             &select_exprs_post_aggr,
-            "Projection references non-aggregate values",
-        )?;
+            CheckColumnsSatisfyExprsPurpose::GroupBy,
+        )
+        .map_err(|err| {
+            err.with_diagnostic(|_| {
+                Diagnostic::new([DiagnosticEntry::new(
+                    "GROUP BY clause is here",
+                    DiagnosticEntryKind::Note,
+                    Span::union_iter(group_by_exprs.iter().filter_map(|e| e.get_span())),
+                )])
+            })
+        })?;
 
         // Rewrite the HAVING expression to use the columns produced by the
         // aggregation.
@@ -815,7 +828,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             check_columns_satisfy_exprs(
                 &column_exprs_post_aggr,
                 std::slice::from_ref(&having_expr_post_aggr),
-                "HAVING clause references non-aggregate values",
+                CheckColumnsSatisfyExprsPurpose::Having,
             )?;
 
             Some(having_expr_post_aggr)
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index e29a2299b41c..b2133d463f87 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -26,8 +26,8 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
 };
 use datafusion_common::{
-    exec_err, internal_err, plan_err, Column, DFSchemaRef, DataFusionError, HashMap,
-    Result, ScalarValue,
+    exec_err, internal_err, plan_err, Column, DFSchemaRef, DataFusionError, Diagnostic,
+    DiagnosticEntry, DiagnosticEntryKind, HashMap, Result, ScalarValue,
 };
 use datafusion_expr::builder::get_struct_unnested_columns;
 use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction};
@@ -37,6 +37,7 @@ use datafusion_expr::{
 };
 use indexmap::IndexMap;
 use sqlparser::ast::{Ident, Value};
+use sqlparser::tokenizer::Span;
 
 /// Make a best-effort attempt at resolving all columns in the expression tree
 pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
@@ -89,12 +90,29 @@ pub(crate) fn rebase_expr(
         .data()
 }
 
+/// Why [`check_columns_satisfy_exprs`] is called. Helps build better error
+/// messages.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CheckColumnsSatisfyExprsPurpose {
+    GroupBy,
+    Having,
+}
+
+impl CheckColumnsSatisfyExprsPurpose {
+    fn get_error_message(&self) -> &'static str {
+        match self {
+            CheckColumnsSatisfyExprsPurpose::GroupBy => "Projection references non-aggregate values",
+            CheckColumnsSatisfyExprsPurpose::Having => "HAVING clause references non-aggregate values",
+        }
+    }
+}
+
 /// Determines if the set of `Expr`'s are a valid projection on the input
 /// `Expr::Column`'s.
 pub(crate) fn check_columns_satisfy_exprs(
     columns: &[Expr],
     exprs: &[Expr],
-    message_prefix: &str,
+    call_purpose: CheckColumnsSatisfyExprsPurpose,
 ) -> Result<()> {
     columns.iter().try_for_each(|c| match c {
         Expr::Column(_) => Ok(()),
@@ -105,22 +123,22 @@ pub(crate) fn check_columns_satisfy_exprs(
         match e {
             Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
                 for e in exprs {
-                    check_column_satisfies_expr(columns, e, message_prefix)?;
+                    check_column_satisfies_expr(columns, e, call_purpose)?;
                 }
             }
             Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
                 for e in exprs {
-                    check_column_satisfies_expr(columns, e, message_prefix)?;
+                    check_column_satisfies_expr(columns, e, call_purpose)?;
                 }
             }
             Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
                 for exprs in lists_of_exprs {
                     for e in exprs {
-                        check_column_satisfies_expr(columns, e, message_prefix)?;
+                        check_column_satisfies_expr(columns, e, call_purpose)?;
                     }
                 }
             }
-            _ => check_column_satisfies_expr(columns, e, message_prefix)?,
+            _ => check_column_satisfies_expr(columns, e, call_purpose)?,
         }
     }
     Ok(())
@@ -129,15 +147,32 @@ pub(crate) fn check_columns_satisfy_exprs(
 fn check_column_satisfies_expr(
     columns: &[Expr],
     expr: &Expr,
-    message_prefix: &str,
+    call_purpose: CheckColumnsSatisfyExprsPurpose,
 ) -> Result<()> {
     if !columns.contains(expr) {
         return plan_err!(
             "{}: Expression {} could not be resolved from available columns: {}",
-            message_prefix,
+            call_purpose.get_error_message(),
             expr,
             expr_vec_fmt!(columns)
-        );
+        )
+        .map_err(|err| {
+            let message = match call_purpose {
+                CheckColumnsSatisfyExprsPurpose::GroupBy => format!("'{}' in projection does not appear in GROUP BY clause", expr.to_string()),
+                CheckColumnsSatisfyExprsPurpose::Having => format!("'{}' in HAVING clause does not appear in GROUP BY clause", expr.to_string()),
+            };
+            err.with_diagnostic(|_| {
+                Diagnostic::new([DiagnosticEntry::new(
+                    message,
+                    DiagnosticEntryKind::Error,
+                    expr.get_span().unwrap_or(Span::empty()),
+                ),DiagnosticEntry::new(
+                    format!("Add '{}' to GROUP BY clause", expr.to_string()),
+                    DiagnosticEntryKind::Help,
+                    Span::empty(),
+                )])
+            })
+        });
     }
     Ok(())
 }

From 515bb5e08453f0a69308f1f149d791f117d33db7 Mon Sep 17 00:00:00 2001
From: Elia Perantoni <eper@wayfare.ai>
Date: Mon, 9 Dec 2024 15:07:38 +0100
Subject: [PATCH 5/8] feat: `FieldSpans` and ambiguous column reference
 diagnostics

---
 datafusion/common/src/column.rs               |  62 ++++++++--
 .../common/src/dfschema/fields_spans.rs       |  42 +++++++
 .../src/{dfschema.rs => dfschema/mod.rs}      | 112 +++++++++++++-----
 datafusion/common/src/lib.rs                  |   4 +-
 datafusion/common/src/with_span.rs            |  69 +++++------
 datafusion/core/src/dataframe/mod.rs          |   4 +-
 datafusion/core/src/physical_planner.rs       |   8 +-
 .../expr-common/src/type_coercion/binary.rs   |  38 +++---
 datafusion/expr/src/expr.rs                   |  21 +++-
 datafusion/expr/src/expr_rewriter/mod.rs      |  18 ++-
 datafusion/expr/src/expr_rewriter/order_by.rs |   2 +-
 datafusion/expr/src/expr_schema.rs            |  10 +-
 datafusion/expr/src/logical_plan/builder.rs   |  22 ++--
 datafusion/expr/src/logical_plan/plan.rs      |  14 ++-
 datafusion/expr/src/utils.rs                  |   5 +-
 .../src/analyzer/expand_wildcard_rule.rs      |   4 +-
 .../optimizer/src/analyzer/type_coercion.rs   |   6 +-
 .../optimizer/src/common_subexpr_eliminate.rs |   7 +-
 datafusion/optimizer/src/decorrelate.rs       |   2 +-
 datafusion/optimizer/src/push_down_filter.rs  |   8 +-
 .../src/replace_distinct_aggregate.rs         |   2 +-
 datafusion/sql/src/expr/identifier.rs         |   4 +-
 datafusion/sql/src/expr/mod.rs                |   4 +-
 datafusion/sql/src/planner.rs                 |   5 +-
 datafusion/sql/src/select.rs                  |   6 +-
 datafusion/sql/src/statement.rs               |   2 +-
 datafusion/sql/src/unparser/plan.rs           |   4 +-
 datafusion/sql/src/utils.rs                   |  41 +++++--
 28 files changed, 353 insertions(+), 173 deletions(-)
 create mode 100644 datafusion/common/src/dfschema/fields_spans.rs
 rename datafusion/common/src/{dfschema.rs => dfschema/mod.rs} (94%)

diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index fe29a24147c3..0ddd1f0a4112 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -22,7 +22,10 @@ use sqlparser::tokenizer::Span;
 
 use crate::error::_schema_err;
 use crate::utils::{parse_identifiers_normalized, quote_identifier};
-use crate::{DFSchema, Result, SchemaError, TableReference};
+use crate::{
+    DFSchema, Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result, SchemaError,
+    TableReference,
+};
 use derivative::Derivative;
 use std::collections::HashSet;
 use std::convert::Infallible;
@@ -46,7 +49,7 @@ pub struct Column {
     /// The location in the source code where this column originally came from.
     /// Does not change as the [`Column`] undergoes normalization and other
     /// transformations.
-    pub span: Span,
+    pub spans: Vec<Span>,
 }
 
 impl Column {
@@ -63,7 +66,7 @@ impl Column {
         Self {
             relation: relation.map(|r| r.into()),
             name: name.into(),
-            span: Span::empty(),
+            spans: vec![],
         }
     }
 
@@ -72,7 +75,7 @@ impl Column {
         Self {
             relation: None,
             name: name.into(),
-            span: Span::empty(),
+            spans: vec![],
         }
     }
 
@@ -83,7 +86,7 @@ impl Column {
         Self {
             relation: None,
             name: name.into(),
-            span: Span::empty(),
+            spans: vec![],
         }
     }
 
@@ -118,7 +121,7 @@ impl Column {
         Some(Self {
             relation,
             name,
-            span: Span::empty(),
+            spans: vec![],
         })
     }
 
@@ -133,7 +136,7 @@ impl Column {
             Self {
                 relation: None,
                 name: flat_name,
-                span: Span::empty(),
+                spans: vec![],
             },
         )
     }
@@ -145,7 +148,7 @@ impl Column {
             Self {
                 relation: None,
                 name: flat_name,
-                span: Span::empty(),
+                spans: vec![],
             },
         )
     }
@@ -251,7 +254,8 @@ impl Column {
                         .flat_map(|s| s.columns_with_unqualified_name(&self.name))
                         .collect::<Vec<_>>();
                     for using_col in using_columns {
-                        let all_matched = columns.iter().all(|c| using_col.contains(c));
+                        let all_matched =
+                            columns.iter().all(|c| using_col.contains(c));
                         // All matched fields belong to the same using column set, in orther words
                         // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
@@ -261,7 +265,33 @@ impl Column {
 
                     // If not due to USING columns then due to ambiguous column name
                     return _schema_err!(SchemaError::AmbiguousReference {
-                        field: Column::new_unqualified(self.name),
+                        field: Column::new_unqualified(&self.name),
+                    })
+                    .map_err(|err| {
+                        err.with_diagnostic(|_| {
+                            Diagnostic::new(
+                                [DiagnosticEntry::new(
+                                    format!("Ambiguous reference to '{}'", &self.name),
+                                    DiagnosticEntryKind::Error,
+                                    *self.spans.first().unwrap_or(&Span::empty()),
+                                )]
+                                .into_iter()
+                                .chain(
+                                    columns.iter().flat_map(|c| {
+                                        c.spans.iter().map(|span| {
+                                            DiagnosticEntry::new(
+                                                format!(
+                                                    "Possible reference to '{}' here",
+                                                    &c.name
+                                                ),
+                                                DiagnosticEntryKind::Note,
+                                                *span,
+                                            )
+                                        })
+                                    }),
+                                ),
+                            )
+                        })
                     });
                 }
             }
@@ -280,7 +310,17 @@ impl Column {
     /// Attaches a [`Span`] to the [`Column`], i.e. its location in the source
     /// SQL query.
     pub fn with_span(mut self, span: Span) -> Self {
-        self.span = span;
+        self.spans = vec![span];
+        self
+    }
+
+    /// Attaches multiple [`Span`]s to the [`Column`], i.e. its locations in the
+    /// source SQL query.
+    pub fn with_spans<IT>(mut self, spans: IT) -> Self
+    where
+        IT: IntoIterator<Item = Span>,
+    {
+        self.spans = spans.into_iter().collect();
         self
     }
 }
diff --git a/datafusion/common/src/dfschema/fields_spans.rs b/datafusion/common/src/dfschema/fields_spans.rs
new file mode 100644
index 000000000000..bbfa9f2783a7
--- /dev/null
+++ b/datafusion/common/src/dfschema/fields_spans.rs
@@ -0,0 +1,42 @@
+use sqlparser::tokenizer::Span;
+
+use crate::JoinType;
+
+/// The location in the source code where the fields are defined (e.g. in the
+/// body of a CTE), if any.
+#[derive(Debug, Clone)]
+pub struct FieldsSpans(Vec<Vec<Span>>);
+
+impl FieldsSpans {
+    pub fn empty(field_count: usize) -> Self {
+        Self((0..field_count).into_iter().map(|_| Vec::new()).collect())
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &Vec<Span>> {
+        self.0.iter()
+    }
+
+    pub fn join(
+        &self,
+        other: &FieldsSpans,
+        join_type: &JoinType,
+        left_cols_len: usize,
+    ) -> FieldsSpans {
+        match join_type {
+            JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                FieldsSpans(self.0.iter().chain(other.0.iter()).cloned().collect())
+            }
+            JoinType::LeftSemi => todo!(),
+            JoinType::RightSemi => todo!(),
+            JoinType::LeftAnti => todo!(),
+            JoinType::RightAnti => todo!(),
+            JoinType::LeftMark => todo!(),
+        }
+    }
+}
+
+impl FromIterator<Vec<Span>> for FieldsSpans {
+    fn from_iter<T: IntoIterator<Item = Vec<Span>>>(iter: T) -> Self {
+        FieldsSpans(iter.into_iter().collect())
+    }
+}
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema/mod.rs
similarity index 94%
rename from datafusion/common/src/dfschema.rs
rename to datafusion/common/src/dfschema/mod.rs
index b53b475edaa0..f326cded1ac1 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema/mod.rs
@@ -17,7 +17,10 @@
 
 //! DFSchema is an extended schema struct that DataFusion uses to provide support for
 //! fields with optional relation names.
+mod fields_spans;
+pub use fields_spans::FieldsSpans;
 
+use std::backtrace::Backtrace;
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::fmt::{Display, Formatter};
 use std::hash::Hash;
@@ -28,10 +31,10 @@ use crate::{
     field_not_found, unqualified_field_not_found, Column, FunctionalDependencies,
     SchemaError, TableReference,
 };
-
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
 use arrow_schema::SchemaBuilder;
+use derivative::Derivative;
 use sqlparser::tokenizer::Span;
 
 /// A reference-counted reference to a [DFSchema].
@@ -105,7 +108,8 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// let schema = Schema::from(df_schema);
 /// assert_eq!(schema.fields().len(), 1);
 /// ```
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Derivative)]
+#[derivative(PartialEq, Eq)]
 pub struct DFSchema {
     /// Inner Arrow schema reference.
     inner: SchemaRef,
@@ -114,6 +118,10 @@ pub struct DFSchema {
     field_qualifiers: Vec<Option<TableReference>>,
     /// Stores functional dependencies in the schema.
     functional_dependencies: FunctionalDependencies,
+    /// The location in the source code where the fields are defined (e.g. in
+    /// the body of a CTE), if any.
+    #[derivative(PartialEq = "ignore")]
+    fields_spans: FieldsSpans,
 }
 
 impl DFSchema {
@@ -123,6 +131,7 @@ impl DFSchema {
             inner: Arc::new(Schema::new([])),
             field_qualifiers: vec![],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(0),
         }
     }
 
@@ -148,12 +157,15 @@ impl DFSchema {
         let (qualifiers, fields): (Vec<Option<TableReference>>, Vec<Arc<Field>>) =
             qualified_fields.into_iter().unzip();
 
+        let field_count = fields.len();
+
         let schema = Arc::new(Schema::new_with_metadata(fields, metadata));
 
         let dfschema = Self {
             inner: schema,
             field_qualifiers: qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(field_count),
         };
         dfschema.check_names()?;
         Ok(dfschema)
@@ -183,6 +195,7 @@ impl DFSchema {
             inner: schema,
             field_qualifiers: vec![None; field_count],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(field_count),
         };
         dfschema.check_names()?;
         Ok(dfschema)
@@ -201,6 +214,7 @@ impl DFSchema {
             inner: schema.clone().into(),
             field_qualifiers: vec![Some(qualifier); schema.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(schema.fields.len()),
         };
         schema.check_names()?;
         Ok(schema)
@@ -211,10 +225,12 @@ impl DFSchema {
         qualifiers: Vec<Option<TableReference>>,
         schema: &SchemaRef,
     ) -> Result<Self> {
+        let field_count = schema.fields.len();
         let dfschema = Self {
             inner: Arc::clone(schema),
             field_qualifiers: qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(field_count),
         };
         dfschema.check_names()?;
         Ok(dfschema)
@@ -266,6 +282,11 @@ impl DFSchema {
         }
     }
 
+    pub fn with_fields_spans(mut self, fields_spans: FieldsSpans) -> Self {
+        self.fields_spans = fields_spans;
+        self
+    }
+
     /// Create a new schema that contains the fields from this schema followed by the fields
     /// from the supplied schema. An error will be returned if there are duplicate field names.
     pub fn join(&self, schema: &DFSchema) -> Result<Self> {
@@ -277,6 +298,7 @@ impl DFSchema {
         let mut new_metadata = self.inner.metadata.clone();
         new_metadata.extend(schema.inner.metadata.clone());
         let new_schema_with_metadata = new_schema.with_metadata(new_metadata);
+        let new_field_count = new_schema_with_metadata.fields.len();
 
         let mut new_qualifiers = self.field_qualifiers.clone();
         new_qualifiers.extend_from_slice(schema.field_qualifiers.as_slice());
@@ -285,6 +307,7 @@ impl DFSchema {
             inner: Arc::new(new_schema_with_metadata),
             field_qualifiers: new_qualifiers,
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(new_field_count),
         };
         new_self.check_names()?;
         Ok(new_self)
@@ -297,8 +320,10 @@ impl DFSchema {
             return;
         }
 
-        let self_fields: HashSet<(Option<&TableReference>, &FieldRef)> =
-            self.iter().collect();
+        let self_fields: HashSet<(Option<&TableReference>, &FieldRef)> = self
+            .iter()
+            .map(|(qualifier, field, _)| (qualifier, field))
+            .collect();
         let self_unqualified_names: HashSet<&str> = self
             .inner
             .fields
@@ -308,7 +333,7 @@ impl DFSchema {
 
         let mut schema_builder = SchemaBuilder::from(self.inner.fields.clone());
         let mut qualifiers = Vec::new();
-        for (qualifier, field) in other_schema.iter() {
+        for (qualifier, field, _) in other_schema.iter() {
             // skip duplicate columns
             let duplicated_field = match qualifier {
                 Some(q) => self_fields.contains(&(Some(q), field)),
@@ -354,7 +379,7 @@ impl DFSchema {
         let mut matches = self
             .iter()
             .enumerate()
-            .filter(|(_, (q, f))| match (qualifier, q) {
+            .filter(|(_, (q, f, _))| match (qualifier, q) {
                 // field to lookup is qualified.
                 // current field is qualified and not shared between relations, compare both
                 // qualifier and name.
@@ -425,8 +450,8 @@ impl DFSchema {
     /// Find all fields having the given qualifier
     pub fn fields_with_qualified(&self, qualifier: &TableReference) -> Vec<&Field> {
         self.iter()
-            .filter(|(q, _)| q.map(|q| q.eq(qualifier)).unwrap_or(false))
-            .map(|(_, f)| f.as_ref())
+            .filter(|(q, _, _)| q.map(|q| q.eq(qualifier)).unwrap_or(false))
+            .map(|(_, f, _)| f.as_ref())
             .collect()
     }
 
@@ -437,7 +462,7 @@ impl DFSchema {
     ) -> Vec<usize> {
         self.iter()
             .enumerate()
-            .filter_map(|(idx, (q, _))| q.and_then(|q| q.eq(qualifier).then_some(idx)))
+            .filter_map(|(idx, (q, _, _))| q.and_then(|q| q.eq(qualifier).then_some(idx)))
             .collect()
     }
 
@@ -456,23 +481,26 @@ impl DFSchema {
         name: &str,
     ) -> Vec<(Option<&TableReference>, &Field)> {
         self.iter()
-            .filter(|(_, field)| field.name() == name)
-            .map(|(qualifier, field)| (qualifier, field.as_ref()))
+            .filter(|(_, field, _)| field.name() == name)
+            .map(|(qualifier, field, _)| (qualifier, field.as_ref()))
             .collect()
     }
 
     /// Find all fields that match the given name and convert to column
     pub fn columns_with_unqualified_name(&self, name: &str) -> Vec<Column> {
         self.iter()
-            .filter(|(_, field)| field.name() == name)
-            .map(|(qualifier, field)| Column::new(qualifier.cloned(), field.name()))
+            .filter(|(_, field, _)| field.name() == name)
+            .map(|(qualifier, field, spans)| {
+                Column::new(qualifier.cloned(), field.name())
+                    .with_spans(spans.iter().copied())
+            })
             .collect()
     }
 
     /// Return all `Column`s for the schema
     pub fn columns(&self) -> Vec<Column> {
         self.iter()
-            .map(|(qualifier, field)| {
+            .map(|(qualifier, field, _)| {
                 Column::new(qualifier.cloned(), field.name().clone())
             })
             .collect()
@@ -506,7 +534,7 @@ impl DFSchema {
                         field: Column {
                             relation: None,
                             name: name.to_string(),
-                            span: Span::empty(),
+                            spans: vec![],
                         },
                     })
                 }
@@ -560,8 +588,9 @@ impl DFSchema {
         qualifier: &TableReference,
         name: &str,
     ) -> bool {
-        self.iter()
-            .any(|(q, f)| q.map(|q| q.eq(qualifier)).unwrap_or(false) && f.name() == name)
+        self.iter().any(|(q, f, _)| {
+            q.map(|q| q.eq(qualifier)).unwrap_or(false) && f.name() == name
+        })
     }
 
     /// Find if the field exists with the given qualified column
@@ -615,11 +644,13 @@ impl DFSchema {
         }
         let self_fields = self.iter();
         let other_fields = other.iter();
-        self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| {
-            q1 == q2
-                && f1.name() == f2.name()
-                && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type())
-        })
+        self_fields
+            .zip(other_fields)
+            .all(|((q1, f1, _), (q2, f2, _))| {
+                q1 == q2
+                    && f1.name() == f2.name()
+                    && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type())
+            })
     }
 
     /// Returns true if the two schemas have the same qualified named
@@ -637,11 +668,16 @@ impl DFSchema {
         }
         let self_fields = self.iter();
         let other_fields = other.iter();
-        self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| {
-            q1 == q2
-                && f1.name() == f2.name()
-                && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type())
-        })
+        self_fields
+            .zip(other_fields)
+            .all(|((q1, f1, _), (q2, f2, _))| {
+                q1 == q2
+                    && f1.name() == f2.name()
+                    && Self::datatype_is_semantically_equal(
+                        f1.data_type(),
+                        f2.data_type(),
+                    )
+            })
     }
 
     /// Checks if two [`DataType`]s are logically equal. This is a notably weaker constraint
@@ -781,6 +817,7 @@ impl DFSchema {
             field_qualifiers: vec![None; self.inner.fields.len()],
             inner: self.inner,
             functional_dependencies: self.functional_dependencies,
+            fields_spans: self.fields_spans,
         }
     }
 
@@ -791,13 +828,14 @@ impl DFSchema {
             field_qualifiers: vec![Some(qualifier); self.inner.fields.len()],
             inner: self.inner,
             functional_dependencies: self.functional_dependencies,
+            fields_spans: self.fields_spans,
         }
     }
 
     /// Get list of fully-qualified field names in this schema
     pub fn field_names(&self) -> Vec<String> {
         self.iter()
-            .map(|(qualifier, field)| qualified_name(qualifier, field.name()))
+            .map(|(qualifier, field, _)| qualified_name(qualifier, field.name()))
             .collect::<Vec<_>>()
     }
 
@@ -811,12 +849,20 @@ impl DFSchema {
         &self.functional_dependencies
     }
 
+    /// Get fields spans
+    pub fn fields_spans(&self) -> &FieldsSpans {
+        &self.fields_spans
+    }
+
     /// Iterate over the qualifiers and fields in the DFSchema
-    pub fn iter(&self) -> impl Iterator<Item = (Option<&TableReference>, &FieldRef)> {
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = (Option<&TableReference>, &FieldRef, &Vec<Span>)> {
         self.field_qualifiers
             .iter()
             .zip(self.inner.fields().iter())
-            .map(|(qualifier, field)| (qualifier.as_ref(), field))
+            .zip(self.fields_spans.iter())
+            .map(|((qualifier, field), span)| (qualifier.as_ref(), field, span))
     }
 }
 
@@ -867,6 +913,7 @@ impl TryFrom<SchemaRef> for DFSchema {
             inner: schema,
             field_qualifiers: vec![None; field_count],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(field_count),
         };
         Ok(dfschema)
     }
@@ -923,6 +970,7 @@ impl ToDFSchema for Vec<Field> {
             inner: schema.into(),
             field_qualifiers: vec![None; field_count],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(field_count),
         };
         Ok(dfschema)
     }
@@ -934,7 +982,7 @@ impl Display for DFSchema {
             f,
             "fields:[{}], metadata:{:?}",
             self.iter()
-                .map(|(q, f)| qualified_name(q, f.name()))
+                .map(|(q, f, _)| qualified_name(q, f.name()))
                 .collect::<Vec<String>>()
                 .join(", "),
             self.inner.metadata
@@ -1284,6 +1332,7 @@ mod tests {
             inner: Arc::clone(&arrow_schema_ref),
             field_qualifiers: vec![None; arrow_schema_ref.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(arrow_schema_ref.fields.len()),
         };
         let df_schema_ref = Arc::new(df_schema.clone());
 
@@ -1330,6 +1379,7 @@ mod tests {
             inner: Arc::clone(&schema),
             field_qualifiers: vec![None; schema.fields.len()],
             functional_dependencies: FunctionalDependencies::empty(),
+            fields_spans: FieldsSpans::empty(schema.fields.len()),
         };
 
         assert_eq!(df_schema.inner.metadata(), schema.metadata())
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 8eed7e18005a..72ce99f78d00 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -54,7 +54,7 @@ pub mod with_span;
 pub use arrow;
 pub use column::Column;
 pub use dfschema::{
-    qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema,
+    qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema, FieldsSpans,
 };
 pub use error::{
     field_not_found, unqualified_field_not_found, DataFusionError, Result, SchemaError,
@@ -79,7 +79,7 @@ pub use table_reference::{ResolvedTableReference, TableReference};
 pub use unnest::{RecursionUnnestOption, UnnestOptions};
 pub use utils::project_schema;
 pub use diagnostic::{Diagnostic, DiagnosticEntry, DiagnosticEntryKind};
-pub use with_span::WithSpan;
+pub use with_span::WithSpans;
 
 // These are hidden from docs purely to avoid polluting the public view of what this crate exports.
 // These are just re-exports of macros by the same name, which gets around the 'cannot refer to
diff --git a/datafusion/common/src/with_span.rs b/datafusion/common/src/with_span.rs
index d2acddaa0d2b..d0a953b40578 100644
--- a/datafusion/common/src/with_span.rs
+++ b/datafusion/common/src/with_span.rs
@@ -1,40 +1,54 @@
-use std::{cmp::Ordering, fmt::{self, Debug, Display}, ops::{Deref, DerefMut}};
+use std::{
+    cmp::Ordering,
+    fmt::{self, Debug, Display},
+    ops::{Deref, DerefMut},
+};
 
+use derivative::Derivative;
 use sqlparser::tokenizer::Span;
 
 /// A wrapper type for entitites that somehow refer to a location in the source
 /// code. For example, a [`DataType`](arrow_schema::DataType) can be tied to an
 /// expression which is being planned for execution, and the expression is in
 /// turn located somewhere in the source code.
-pub struct WithSpan<T> {
-    pub span: Span,
+#[derive(Clone, Derivative)]
+#[derivative(PartialEq, Eq, PartialOrd, Ord)]
+pub struct WithSpans<T> {
+    #[derivative(PartialEq = "ignore", PartialOrd = "ignore", Ord = "ignore")]
+    pub spans: Vec<Span>,
     pub inner: T,
 }
 
-impl<T> WithSpan<T> {
+impl<T> WithSpans<T> {
     pub fn into_inner(self) -> T {
         self.inner
     }
 
-    pub fn new(inner: T, span: Span) -> Self {
-        Self { span, inner }
+    pub fn new<IT>(inner: T, spans: IT) -> Self
+    where
+        IT: IntoIterator<Item = Span>,
+    {
+        Self {
+            spans: spans.into_iter().collect(),
+            inner,
+        }
     }
 
     pub fn new_without_span(inner: T) -> Self {
         Self {
-            span: Span::empty(),
+            spans: vec![],
             inner,
         }
     }
 }
 
-impl<T> From<T> for WithSpan<T> {
+impl<T> From<T> for WithSpans<T> {
     fn from(inner: T) -> Self {
         Self::new_without_span(inner)
     }
 }
 
-impl<T> Deref for WithSpan<T> {
+impl<T> Deref for WithSpans<T> {
     type Target = T;
 
     fn deref(&self) -> &Self::Target {
@@ -42,55 +56,26 @@ impl<T> Deref for WithSpan<T> {
     }
 }
 
-impl<T> DerefMut for WithSpan<T> {
+impl<T> DerefMut for WithSpans<T> {
     fn deref_mut(&mut self) -> &mut Self::Target {
         &mut self.inner
     }
 }
 
-impl<T> AsRef<T> for WithSpan<T> {
+impl<T> AsRef<T> for WithSpans<T> {
     fn as_ref(&self) -> &T {
         &self.inner
     }
 }
 
-impl<T: PartialEq> PartialEq for WithSpan<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.inner.eq(&other.inner)
-    }
-}
-
-impl<T: Eq> Eq for WithSpan<T> {}
-
-impl<T: PartialOrd> PartialOrd for WithSpan<T> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        self.inner.partial_cmp(&other.inner)
-    }
-}
-
-impl<T: Ord> Ord for WithSpan<T> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.inner.cmp(&other.inner)
-    }
-}
-
-impl<T: Debug> Debug for WithSpan<T> {
+impl<T: Debug> Debug for WithSpans<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         self.inner.fmt(f)
     }
 }
 
-impl<T: Display> Display for WithSpan<T> {
+impl<T: Display> Display for WithSpans<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         self.inner.fmt(f)
     }
 }
-
-impl<T: Clone> Clone for WithSpan<T> {
-    fn clone(&self) -> Self {
-        Self {
-            span: self.span,
-            inner: self.inner.clone(),
-        }
-    }
-}
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 82ee52d7b2e3..ce0d145445db 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1686,7 +1686,7 @@ impl DataFrame {
         let mut fields: Vec<Expr> = plan
             .schema()
             .iter()
-            .filter_map(|(qualifier, field)| {
+            .filter_map(|(qualifier, field, _)| {
                 if field.name() == name {
                     col_exists = true;
                     Some(new_column.clone())
@@ -1764,7 +1764,7 @@ impl DataFrame {
             .plan
             .schema()
             .iter()
-            .map(|(qualifier, field)| {
+            .map(|(qualifier, field, _)| {
                 if qualifier.eq(&qualifier_rename) && field.as_ref() == field_rename {
                     col(Column::from((qualifier, field))).alias(new_name)
                 } else {
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 44537c951f94..96e1317a0a0c 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -963,8 +963,12 @@ impl DefaultPhysicalPlanner {
 
                     // Remove temporary projected columns
                     if left_projected || right_projected {
-                        let final_join_result =
-                            join_schema.iter().map(Expr::from).collect::<Vec<_>>();
+                        let final_join_result = join_schema
+                            .iter()
+                            .map(|(q, f, spans)| {
+                                Expr::from((q, f, spans.iter().copied()))
+                            })
+                            .collect::<Vec<_>>();
                         let projection = LogicalPlan::Projection(Projection::try_new(
                             final_join_result,
                             Arc::new(new_join),
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 954807eae226..4000170e2824 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -31,7 +31,7 @@ use arrow::datatypes::{
 use datafusion_common::types::NativeType;
 use datafusion_common::{
     exec_datafusion_err, exec_err, internal_err, plan_datafusion_err, plan_err,
-    Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result, WithSpan,
+    Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result, WithSpans,
 };
 use itertools::Itertools;
 use sqlparser::tokenizer::Span;
@@ -72,16 +72,16 @@ impl Signature {
 
 /// Returns a [`Signature`] for applying `op` to arguments of type `lhs` and `rhs`
 fn signature<'a>(
-    lhs: impl Into<WithSpan<&'a DataType>>,
-    op: impl Into<WithSpan<&'a Operator>>,
-    rhs: impl Into<WithSpan<&'a DataType>>,
+    lhs: impl Into<WithSpans<&'a DataType>>,
+    op: impl Into<WithSpans<&'a Operator>>,
+    rhs: impl Into<WithSpans<&'a DataType>>,
 ) -> Result<Signature> {
     use arrow::datatypes::DataType::*;
     use Operator::*;
 
-    let lhs: WithSpan<_> = lhs.into();
-    let op: WithSpan<_> = op.into();
-    let rhs: WithSpan<_> = rhs.into();
+    let lhs: WithSpans<_> = lhs.into();
+    let op: WithSpans<_> = op.into();
+    let rhs: WithSpans<_> = rhs.into();
 
     match *op {
         Eq |
@@ -201,28 +201,28 @@ fn signature<'a>(
                 plan_err!(
                     "Cannot coerce arithmetic expression {lhs} {op} {rhs} to valid types"
                 ).map_err(|err | {
-                    if lhs.span == Span::empty() || rhs.span == Span::empty() {
-                        err
-                    } else {
+                    if let (Some(&lhs_span), Some(&rhs_span)) = (lhs.spans.first(), rhs.spans.first()) {
                         err.with_diagnostic(|_| Diagnostic {
                             entries: vec![
                                 DiagnosticEntry {
                                     kind: DiagnosticEntryKind::Error,
                                     message: "Binary expression has invalid types".to_string(),
-                                    span: lhs.span.union(&rhs.span),
+                                    span: lhs_span.union(&rhs_span),
                                 },
                                 DiagnosticEntry {
                                     kind: DiagnosticEntryKind::Note,
                                     message: format!("Is of type {}", *lhs),
-                                    span: lhs.span,
+                                    span: lhs_span,
                                 },
                                 DiagnosticEntry {
                                     kind: DiagnosticEntryKind::Note,
                                     message: format!("Is of type {}", *rhs),
-                                    span: rhs.span,
+                                    span: rhs_span,
                                 }
                             ]
                         })
+                    } else {
+                        err
                     }
                 })
             }
@@ -232,18 +232,18 @@ fn signature<'a>(
 
 /// Returns the resulting type of a binary expression evaluating the `op` with the left and right hand types
 pub fn get_result_type<'a>(
-    lhs: impl Into<WithSpan<&'a DataType>>,
-    op: impl Into<WithSpan<&'a Operator>>,
-    rhs: impl Into<WithSpan<&'a DataType>>,
+    lhs: impl Into<WithSpans<&'a DataType>>,
+    op: impl Into<WithSpans<&'a Operator>>,
+    rhs: impl Into<WithSpans<&'a DataType>>,
 ) -> Result<DataType> {
     signature(lhs, op, rhs).map(|sig| sig.ret)
 }
 
 /// Returns the coerced input types for a binary expression evaluating the `op` with the left and right hand types
 pub fn get_input_types<'a>(
-    lhs: impl Into<WithSpan<&'a DataType>>,
-    op: impl Into<WithSpan<&'a Operator>>,
-    rhs: impl Into<WithSpan<&'a DataType>>,
+    lhs: impl Into<WithSpans<&'a DataType>>,
+    op: impl Into<WithSpans<&'a Operator>>,
+    rhs: impl Into<WithSpans<&'a DataType>>,
 ) -> Result<(DataType, DataType)> {
     signature(lhs, op, rhs).map(|sig| (sig.lhs, sig.rhs))
 }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c5eaa4bf88e4..5fd4faa06242 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -354,6 +354,16 @@ impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
     }
 }
 
+/// Same as [`From<(Option<&'a TableReference>, &'a FieldRef)>`] but with a span
+impl<'a, IT> From<(Option<&'a TableReference>, &'a FieldRef, IT)> for Expr
+where
+    IT: IntoIterator<Item = Span>,
+{
+    fn from(value: (Option<&'a TableReference>, &'a FieldRef, IT)) -> Self {
+        Expr::from(Column::from((value.0, value.1)).with_spans(value.2))
+    }
+}
+
 impl<'a> TreeNodeContainer<'a, Self> for Expr {
     fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>(
         &'a self,
@@ -1114,7 +1124,11 @@ impl Expr {
     /// output schema. We can use this qualified name to reference the field.
     pub fn qualified_name(&self) -> (Option<TableReference>, String) {
         match self {
-            Expr::Column(Column { relation, name, span: _ }) => (relation.clone(), name.clone()),
+            Expr::Column(Column {
+                relation,
+                name,
+                spans: _,
+            }) => (relation.clone(), name.clone()),
             Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
             _ => (None, self.schema_name().to_string()),
         }
@@ -1667,9 +1681,10 @@ impl Expr {
         }
     }
 
-    pub fn get_span(&self) -> Option<Span> {
+    pub fn get_spans(&self) -> Option<&Vec<Span>> {
         match self {
-            Expr::Column(Column{span, ..}) => Some(*span),
+            Expr::Column(Column { spans, .. }) => Some(spans),
+            Expr::Alias(Alias { expr, .. }) => expr.get_spans(),
             _ => None,
         }
     }
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index c32f217c933b..2c5c83999038 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -160,7 +160,7 @@ pub fn unnormalize_col(expr: Expr) -> Expr {
                 let col = Column {
                     relation: None,
                     name: c.name,
-                    span: c.span,
+                    spans: c.spans,
                 };
                 Transformed::yes(Expr::Column(col))
             } else {
@@ -182,10 +182,14 @@ pub fn create_col_from_scalar_expr(
             Some::<TableReference>(subqry_alias.into()),
             name,
         )),
-        Expr::Column(Column { relation: _, name, span }) => Ok(Column::new(
-            Some::<TableReference>(subqry_alias.into()),
+        Expr::Column(Column {
+            relation: _,
             name,
-        ).with_span(*span)),
+            spans,
+        }) => Ok(
+            Column::new(Some::<TableReference>(subqry_alias.into()), name)
+                .with_spans(spans.iter().copied()),
+        ),
         _ => {
             let scalar_column = scalar_expr.schema_name().to_string();
             Ok(Column::new(
@@ -232,7 +236,11 @@ pub fn coerce_plan_expr_for_schema(
             Ok(LogicalPlan::Projection(projection))
         }
         _ => {
-            let exprs: Vec<Expr> = plan.schema().iter().map(Expr::from).collect();
+            let exprs: Vec<Expr> = plan
+                .schema()
+                .iter()
+                .map(|(q, f, spans)| Expr::from((q, f, spans.iter().copied())))
+                .collect();
             let new_exprs = coerce_exprs_for_schema(exprs, plan.schema(), schema)?;
             let add_project = new_exprs.iter().any(|expr| expr.try_as_col().is_none());
             if add_project {
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index c49197f78eb9..35d0fc2807db 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -103,7 +103,7 @@ fn rewrite_in_terms_of_projection(
             relation: None,
             name,
             // Span is not used in PartialEq
-            span: Span::empty(),
+            spans: vec![],
         });
 
         // look for the column named the same as this expr
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 88dd74ded7d5..96ff975cf9e3 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -29,7 +29,7 @@ use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
     not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result,
-    TableReference, WithSpan,
+    TableReference, WithSpans,
 };
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use recursive::recursive;
@@ -405,16 +405,16 @@ impl ExprSchemable for Expr {
             }) => {
                 let (left_type, left_is_nullable) =
                     left.data_type_and_nullable(schema)?;
-                let left_type = if let Some(span) = left.get_span() {
-                    WithSpan::new(&left_type, span)
+                let left_type = if let Some(spans) = left.get_spans() {
+                    WithSpans::new(&left_type, spans.iter().copied())
                 } else {
                     (&left_type).into()
                 };
 
                 let (right_type, right_is_nullable) =
                     right.data_type_and_nullable(schema)?;
-                let right_type = if let Some(span) = right.get_span() {
-                    WithSpan::new(&right_type, span)
+                let right_type = if let Some(spans) = right.get_spans() {
+                    WithSpans::new(&right_type, spans.iter().copied())
                 } else {
                     (&right_type).into()
                 };
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 4a8c5d08e284..0e810a623b96 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -52,13 +52,11 @@ use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{
-    exec_err, get_target_functional_dependencies, internal_err, not_impl_err,
-    plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, DataFusionError,
-    FunctionalDependencies, Result, ScalarValue, TableReference, ToDFSchema,
-    UnnestOptions,
+    exec_err, get_target_functional_dependencies, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, DataFusionError, FieldsSpans, FunctionalDependencies, Result, ScalarValue, TableReference, ToDFSchema, UnnestOptions
 };
 use datafusion_expr_common::type_coercion::binary::type_union_resolution;
 use indexmap::IndexSet;
+use sqlparser::tokenizer::Span;
 
 /// Default table name for unnamed table
 pub const UNNAMED_TABLE: &str = "?table?";
@@ -1340,7 +1338,7 @@ pub fn change_redundant_column(fields: &Fields) -> Vec<Field> {
 fn mark_field(schema: &DFSchema) -> (Option<TableReference>, Arc<Field>) {
     let mut table_references = schema
         .iter()
-        .filter_map(|(qualifier, _)| qualifier)
+        .filter_map(|(qualifier, _, _)| qualifier)
         .collect::<Vec<_>>();
     table_references.dedup();
     let table_reference = if table_references.len() == 1 {
@@ -1374,8 +1372,8 @@ pub fn build_join_schema(
             .collect()
     }
 
-    let right_fields = right.iter();
-    let left_fields = left.iter();
+    let right_fields = right.iter().map(|(q, f, _)| (q, f));
+    let left_fields = left.iter().map(|(q, f, _)| (q, f));
 
     let qualified_fields: Vec<(Option<TableReference>, Arc<Field>)> = match join_type {
         JoinType::Inner => {
@@ -1437,6 +1435,9 @@ pub fn build_join_schema(
         join_type,
         left.fields().len(),
     );
+    let left_fields_spans: FieldsSpans = left.iter().map(|(_, _, spans)| spans).cloned().collect();
+    let right_fields_spans: FieldsSpans = right.iter().map(|(_, _, spans)| spans).cloned().collect();
+    let fields_spans = left_fields_spans.join(&right_fields_spans, join_type, left.fields().len());
     let metadata = left
         .metadata()
         .clone()
@@ -1444,7 +1445,8 @@ pub fn build_join_schema(
         .chain(right.metadata().clone())
         .collect();
     let dfschema = DFSchema::new_with_metadata(qualified_fields, metadata)?;
-    dfschema.with_functional_dependencies(func_dependencies)
+    let dfschema = dfschema.with_functional_dependencies(func_dependencies)?;
+    Ok(dfschema.with_fields_spans(fields_spans))
 }
 
 /// Add additional "synthetic" group by expressions based on functional
@@ -1863,7 +1865,7 @@ pub fn unnest_with_options(
     let fields = input_schema
         .iter()
         .enumerate()
-        .map(|(index, (original_qualifier, original_field))| {
+        .map(|(index, (original_qualifier, original_field, _))| {
             match indices_to_unnest.get(&index) {
                 Some(column_to_unnest) => {
                     let recursions_on_column = options
@@ -2212,7 +2214,7 @@ mod tests {
                         Column {
                             relation: Some(TableReference::Bare { table }),
                             name,
-                            span: _,
+                            spans: _,
                         },
                 },
                 _,
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index fe725e7d96de..af6c0ffec5cd 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2163,13 +2163,19 @@ impl Projection {
 /// produced by the projection operation. If the schema computation is successful,
 /// the `Result` will contain the schema; otherwise, it will contain an error.
 pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSchema>> {
+
     let metadata = input.schema().metadata().clone();
+    let fields_spans = exprs
+        .iter()
+        .map(|e| e.get_spans().cloned().unwrap_or_else(|| vec![]))
+        .collect();
 
     let schema =
         DFSchema::new_with_metadata(exprlist_to_fields(exprs, input)?, metadata)?
             .with_functional_dependencies(calc_func_dependencies_for_project(
                 exprs, input,
-            )?)?;
+            )?)?
+            .with_fields_spans(fields_spans);
 
     Ok(Arc::new(schema))
 }
@@ -2200,9 +2206,11 @@ impl SubqueryAlias {
         // Since schema is the same, other than qualifier, we can use existing
         // functional dependencies:
         let func_dependencies = plan.schema().functional_dependencies().clone();
+        let fields_spans = plan.schema().fields_spans().clone();
         let schema = DFSchemaRef::new(
             DFSchema::try_from_qualified_schema(alias.clone(), &schema)?
-                .with_functional_dependencies(func_dependencies)?,
+                .with_functional_dependencies(func_dependencies)?
+                .with_fields_spans(fields_spans),
         );
         Ok(SubqueryAlias {
             input: plan,
@@ -2379,7 +2387,7 @@ impl Window {
         let fields: Vec<(Option<TableReference>, Arc<Field>)> = input
             .schema()
             .iter()
-            .map(|(q, f)| (q.cloned(), Arc::clone(f)))
+            .map(|(q, f, _)| (q.cloned(), Arc::clone(f)))
             .collect();
         let input_len = fields.len();
         let mut window_fields = fields;
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 6f7c5d379260..85ab065b7962 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -363,7 +363,10 @@ fn get_exprs_except_skipped(
     columns_to_skip: HashSet<Column>,
 ) -> Vec<Expr> {
     if columns_to_skip.is_empty() {
-        schema.iter().map(Expr::from).collect::<Vec<Expr>>()
+        schema
+            .iter()
+            .map(|(q, f, spans)| Expr::from((q, f, spans.iter().copied())))
+            .collect::<Vec<Expr>>()
     } else {
         schema
             .columns()
diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
index 1853f0aaf3ec..225b6098a9eb 100644
--- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
@@ -123,7 +123,7 @@ fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> {
             Expr::Column(Column {
                 ref relation,
                 ref name,
-                span,
+                ref spans,
             }) => {
                 if name.eq("*") {
                     let expanded_columns = if let Some(qualifier) = relation {
@@ -142,7 +142,7 @@ fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> {
 
                     projected_expr.extend(expanded_columns.into_iter().map(|mut expr| {
                         if let Expr::Column(c) = &mut expr {
-                            c.span = span;
+                            c.spans = spans.clone();
                         }
                         expr
                     }));
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 1bd4009b47cc..c5aeb0ccddc9 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -574,7 +574,7 @@ fn transform_schema_to_nonview(dfschema: &DFSchemaRef) -> Option<Result<DFSchema
     let (qualifiers, transformed_fields): (Vec<Option<TableReference>>, Vec<Arc<Field>>) =
         dfschema
             .iter()
-            .map(|(qualifier, field)| match field.data_type() {
+            .map(|(qualifier, field, _)| match field.data_type() {
                 DataType::Utf8View => {
                     transformed = true;
                     (
@@ -974,7 +974,7 @@ pub fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
         union_nullabilities,
         union_field_meta.into_iter()
     )
-    .map(|((qualifier, field), datatype, nullable, metadata)| {
+    .map(|((qualifier, field, _), datatype, nullable, metadata)| {
         let mut field = Field::new(field.name().clone(), datatype, nullable);
         field.set_metadata(metadata);
         (qualifier.cloned(), field.into())
@@ -1000,7 +1000,7 @@ fn project_with_column_index(
             Expr::Column(Column {
                 relation: _,
                 ref name,
-                span: _,
+                spans: _,
             }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
             Expr::Alias { .. } | Expr::Column { .. } => e,
             _ => e.alias(schema.field(i).name()),
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 0ea2d24effbb..163d0966aff6 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -751,7 +751,7 @@ fn build_common_expr_project_plan(
         })
         .collect::<Result<Vec<_>>>()?;
 
-    for (qualifier, field) in input.schema().iter() {
+    for (qualifier, field, _) in input.schema().iter() {
         if fields_set.insert(qualified_name(qualifier, field.name())) {
             project_exprs.push(Expr::from((qualifier, field)));
         }
@@ -769,7 +769,10 @@ fn build_recover_project_plan(
     schema: &DFSchema,
     input: LogicalPlan,
 ) -> Result<LogicalPlan> {
-    let col_exprs = schema.iter().map(Expr::from).collect();
+    let col_exprs = schema
+        .iter()
+        .map(|(q, f, spans)| Expr::from((q, f, spans.iter().copied())))
+        .collect();
     Projection::try_new(col_exprs, Arc::new(input)).map(LogicalPlan::Projection)
 }
 
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 5ebd73799dd5..5c519dce8fbb 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -532,7 +532,7 @@ fn proj_exprs_evaluation_result_on_empty_batch(
             let result_expr = simplifier.simplify(result_expr)?;
             let expr_name = match expr {
                 Expr::Alias(Alias { name, .. }) => name.to_string(),
-                Expr::Column(Column { relation: _, name, span: _ }) => name.to_string(),
+                Expr::Column(Column { relation: _, name, spans: _ }) => name.to_string(),
                 _ => expr.schema_name().to_string(),
             };
             expr_result_map_for_count_bug.insert(expr_name, result_expr);
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index fe751a5fb583..ba4bd5734fb0 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -238,7 +238,7 @@ impl<'a> ColumnChecker<'a> {
 fn schema_columns(schema: &DFSchema) -> HashSet<Column> {
     schema
         .iter()
-        .flat_map(|(qualifier, field)| {
+        .flat_map(|(qualifier, field, _)| {
             [
                 Column::new(qualifier.cloned(), field.name()),
                 // we need to push down filter using unqualified column as well
@@ -818,7 +818,7 @@ impl OptimizerRule for PushDownFilter {
             }
             LogicalPlan::SubqueryAlias(subquery_alias) => {
                 let mut replace_map = HashMap::new();
-                for (i, (qualifier, field)) in
+                for (i, (qualifier, field, _)) in
                     subquery_alias.input.schema().iter().enumerate()
                 {
                     let (sub_qualifier, sub_field) =
@@ -910,7 +910,7 @@ impl OptimizerRule for PushDownFilter {
                 let mut inputs = Vec::with_capacity(union.inputs.len());
                 for input in &union.inputs {
                     let mut replace_map = HashMap::new();
-                    for (i, (qualifier, field)) in input.schema().iter().enumerate() {
+                    for (i, (qualifier, field, _)) in input.schema().iter().enumerate() {
                         let (union_qualifier, union_field) =
                             union.schema.qualified_field(i);
                         replace_map.insert(
@@ -1156,7 +1156,7 @@ fn rewrite_projection(
         .schema
         .iter()
         .zip(projection.expr.iter())
-        .map(|((qualifier, field), expr)| {
+        .map(|((qualifier, field, _), expr)| {
             // strip alias, as they should not be part of filters
             let expr = expr.clone().unalias();
 
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 48b2828faf45..4787ba8d4ca7 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -157,7 +157,7 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
                     .iter()
                     .skip(expr_cnt)
                     .zip(schema.iter())
-                    .map(|((new_qualifier, new_field), (old_qualifier, old_field))| {
+                    .map(|((new_qualifier, new_field, _), (old_qualifier, old_field, _))| {
                         col(Column::from((new_qualifier, new_field)))
                             .alias_qualified(old_qualifier.cloned(), old_field.name())
                     })
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 14c7bb8f51c1..735221eae6d5 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -63,7 +63,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 return Ok(Expr::Column(Column {
                     relation: qualifier.filter(|q| q.table() != UNNAMED_TABLE).cloned(),
                     name: normalize_ident,
-                    span: id_span,
+                    spans: vec![id_span],
                 }));
             }
 
@@ -84,7 +84,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Ok(Expr::Column(Column {
                 relation: None,
                 name: normalize_ident,
-                span: id_span,
+                spans: vec![id_span],
             }))
         }
     }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 0195f3d32032..c08a9fd44e04 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -150,14 +150,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         match expr {
             Expr::Column(col) => match &col.relation {
                 Some(q) => {
-                    match schema.iter().find(|(qualifier, field)| match qualifier {
+                    match schema.iter().find(|(qualifier, field, _)| match qualifier {
                         Some(field_q) => {
                             field.name() == &col.name
                                 && field_q.to_string().ends_with(&format!(".{q}"))
                         }
                         _ => false,
                     }) {
-                        Some((qualifier, df_field)) => Expr::from((qualifier, df_field)),
+                        Some((qualifier, df_field, _)) => Expr::from((qualifier, df_field)),
                         None => Expr::Column(col),
                     }
                 }
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 0b1481544d05..bb5aa305bc8b 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -38,6 +38,7 @@ use datafusion_common::{
 use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
 use datafusion_expr::utils::find_column_exprs;
 use datafusion_expr::{col, Expr};
+use sqlparser::tokenizer::Span;
 
 use crate::utils::{make_decimal_type, value_to_string};
 pub use datafusion_expr::planner::ContextProvider;
@@ -393,7 +394,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                                             col.name, r
                                         ),
                                         DiagnosticEntryKind::Error,
-                                        col.span,
+                                        col.spans.first().copied().unwrap_or(Span::empty()),
                                     )])
                                 })
                             },
@@ -414,7 +415,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             Diagnostic::new([DiagnosticEntry::new(
                                 format!("No field named '{}'", col.name),
                                 DiagnosticEntryKind::Error,
-                                col.span,
+                                col.spans.first().copied().unwrap_or(Span::empty()),
                             )])
                         })
                 }),
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 6d1b3f185ae1..48abe99cc941 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -814,7 +814,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 Diagnostic::new([DiagnosticEntry::new(
                     "GROUP BY clause is here",
                     DiagnosticEntryKind::Note,
-                    Span::union_iter(group_by_exprs.iter().filter_map(|e| e.get_span())),
+                    Span::union_iter(
+                        group_by_exprs
+                            .iter()
+                            .flat_map(|e| e.get_spans().cloned().unwrap_or_default()),
+                    ),
                 )])
             })
         })?;
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 47a842643b8d..572ac8221b1c 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1739,7 +1739,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // Build updated values for each column, using the previous value if not modified
         let exprs = table_schema
             .iter()
-            .map(|(qualifier, field)| {
+            .map(|(qualifier, field, _)| {
                 let expr = match assign_map.remove(field.name()) {
                     Some(new_value) => {
                         let mut expr = self.sql_to_expr(
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 5f015c6465fc..8118aa7fa845 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -911,12 +911,12 @@ impl Unparser<'_> {
                     Expr::Column(Column {
                         relation: _,
                         name: left_name,
-                        span: _,
+                        spans: _,
                     }),
                     Expr::Column(Column {
                         relation: _,
                         name: right_name,
-                        span: _,
+                        spans: _,
                     }),
                 ) if left_name == right_name => {
                     idents.push(self.new_ident_quoted_if_needs(left_name.to_string()));
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index b2133d463f87..cfe9feb7ce03 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -101,8 +101,12 @@ pub enum CheckColumnsSatisfyExprsPurpose {
 impl CheckColumnsSatisfyExprsPurpose {
     fn get_error_message(&self) -> &'static str {
         match self {
-            CheckColumnsSatisfyExprsPurpose::GroupBy => "Projection references non-aggregate values",
-            CheckColumnsSatisfyExprsPurpose::Having => "HAVING clause references non-aggregate values",
+            CheckColumnsSatisfyExprsPurpose::GroupBy => {
+                "Projection references non-aggregate values"
+            }
+            CheckColumnsSatisfyExprsPurpose::Having => {
+                "HAVING clause references non-aggregate values"
+            }
         }
     }
 }
@@ -158,19 +162,30 @@ fn check_column_satisfies_expr(
         )
         .map_err(|err| {
             let message = match call_purpose {
-                CheckColumnsSatisfyExprsPurpose::GroupBy => format!("'{}' in projection does not appear in GROUP BY clause", expr.to_string()),
-                CheckColumnsSatisfyExprsPurpose::Having => format!("'{}' in HAVING clause does not appear in GROUP BY clause", expr.to_string()),
+                CheckColumnsSatisfyExprsPurpose::GroupBy => format!(
+                    "'{}' in projection does not appear in GROUP BY clause",
+                    expr.to_string()
+                ),
+                CheckColumnsSatisfyExprsPurpose::Having => format!(
+                    "'{}' in HAVING clause does not appear in GROUP BY clause",
+                    expr.to_string()
+                ),
             };
             err.with_diagnostic(|_| {
-                Diagnostic::new([DiagnosticEntry::new(
-                    message,
-                    DiagnosticEntryKind::Error,
-                    expr.get_span().unwrap_or(Span::empty()),
-                ),DiagnosticEntry::new(
-                    format!("Add '{}' to GROUP BY clause", expr.to_string()),
-                    DiagnosticEntryKind::Help,
-                    Span::empty(),
-                )])
+                Diagnostic::new([
+                    DiagnosticEntry::new(
+                        message,
+                        DiagnosticEntryKind::Error,
+                        expr.get_spans()
+                            .and_then(|spans| spans.first().copied())
+                            .unwrap_or(Span::empty()),
+                    ),
+                    DiagnosticEntry::new(
+                        format!("Add '{}' to GROUP BY clause", expr.to_string()),
+                        DiagnosticEntryKind::Help,
+                        Span::empty(),
+                    ),
+                ])
             })
         });
     }

From 03afbf675fd2353d6b27f77c80faa5ae417e0d0e Mon Sep 17 00:00:00 2001
From: Elia Perantoni <eper@wayfare.ai>
Date: Tue, 10 Dec 2024 10:04:09 +0100
Subject: [PATCH 6/8] feat: UNION wrong number of fields diagnostics

---
 datafusion/expr/src/logical_plan/builder.rs | 42 ++++++++++++++++++---
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index da3903615bec..445cf3dea3a6 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -52,7 +52,11 @@ use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{
-    exec_err, get_target_functional_dependencies, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, DataFusionError, FieldsSpans, FunctionalDependencies, Result, ScalarValue, TableReference, ToDFSchema, UnnestOptions
+    exec_err, get_target_functional_dependencies, internal_err, not_impl_err,
+    plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, DataFusionError,
+    Diagnostic, DiagnosticEntry, DiagnosticEntryKind, FieldsSpans,
+    FunctionalDependencies, Result, ScalarValue, TableReference, ToDFSchema,
+    UnnestOptions,
 };
 use datafusion_expr_common::type_coercion::binary::type_union_resolution;
 
@@ -1436,9 +1440,12 @@ pub fn build_join_schema(
         join_type,
         left.fields().len(),
     );
-    let left_fields_spans: FieldsSpans = left.iter().map(|(_, _, spans)| spans).cloned().collect();
-    let right_fields_spans: FieldsSpans = right.iter().map(|(_, _, spans)| spans).cloned().collect();
-    let fields_spans = left_fields_spans.join(&right_fields_spans, join_type, left.fields().len());
+    let left_fields_spans: FieldsSpans =
+        left.iter().map(|(_, _, spans)| spans).cloned().collect();
+    let right_fields_spans: FieldsSpans =
+        right.iter().map(|(_, _, spans)| spans).cloned().collect();
+    let fields_spans =
+        left_fields_spans.join(&right_fields_spans, join_type, left.fields().len());
     let metadata = left
         .metadata()
         .clone()
@@ -1526,7 +1533,32 @@ pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalP
             left has {} columns whereas right has {} columns",
             left_plan.schema().fields().len(),
             right_plan.schema().fields().len()
-        );
+        )
+        .map_err(|err| {
+            err.with_diagnostic(|_| {
+                Diagnostic::new([
+                    DiagnosticEntry::new(
+                        "UNION queries have different number of columns",
+                        DiagnosticEntryKind::Error,
+                        Span::empty(),
+                    ),
+                    DiagnosticEntry::new(
+                        format!("This side has {} columns", left_plan.schema().fields().len()),
+                        DiagnosticEntryKind::Note,
+                        Span::union_iter(
+                            left_plan.schema().fields_spans().iter().flatten().copied(),
+                        ),
+                    ),
+                    DiagnosticEntry::new(
+                        format!("This side has {} columns", right_plan.schema().fields().len()),
+                        DiagnosticEntryKind::Note,
+                        Span::union_iter(
+                            right_plan.schema().fields_spans().iter().flatten().copied(),
+                        ),
+                    ),
+                ])
+            })
+        });
     }
 
     // Temporarily use the schema from the left input and later rely on the analyzer to

From 2d3e2a1c18e015b1a5ed7be84890f2a46e2bad6b Mon Sep 17 00:00:00 2001
From: Elia Perantoni <eper@wayfare.ai>
Date: Tue, 10 Dec 2024 12:23:24 +0100
Subject: [PATCH 7/8] feat: collect more errors

---
 datafusion/common/src/error.rs | 64 ++++++++++++++++++++++++++++++++++
 datafusion/expr/src/utils.rs   | 20 +++++++----
 datafusion/sql/src/utils.rs    | 20 ++++++++---
 3 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index f1a816ceac29..861fff185c75 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -133,6 +133,7 @@ pub enum DataFusionError {
     /// or serializing/deserializing protobytes to Substrait plans
     Substrait(String),
     Diagnostic(Diagnostic, Box<DataFusionError>),
+    Collection(Vec<DataFusionError>),
 }
 
 #[macro_export]
@@ -319,6 +320,7 @@ impl Error for DataFusionError {
             DataFusionError::Context(_, e) => Some(e.as_ref()),
             DataFusionError::Substrait(_) => None,
             DataFusionError::Diagnostic(_, e) => Some(e.as_ref()),
+            DataFusionError::Collection(_) => None,
         }
     }
 }
@@ -442,6 +444,7 @@ impl DataFusionError {
             DataFusionError::Context(_, _) => "",
             DataFusionError::Substrait(_) => "Substrait error: ",
             DataFusionError::Diagnostic(_, _) => "",
+            DataFusionError::Collection(_) => "",
         }
     }
 
@@ -483,6 +486,13 @@ impl DataFusionError {
             }
             DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()),
             DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()),
+            DataFusionError::Collection(ref v) => Cow::Owned(format!(
+                "[{}]",
+                v.iter()
+                    .map(|e| e.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            )),
         }
     }
 
@@ -517,6 +527,60 @@ impl DataFusionError {
 
         DiagnosticsIterator { head: self }
     }
+
+    pub fn get_individual_errors(&self) -> impl Iterator<Item = &Self> + '_ {
+        fn contains_collection(err: &DataFusionError) -> bool {
+            let mut head = err;
+            loop {
+                if let DataFusionError::Collection(_) = head {
+                    return true;
+                }
+
+                if let Some(source) = head
+                    .source()
+                    .map(|source| source.downcast_ref::<DataFusionError>())
+                    .flatten()
+                {
+                    head = source;
+                } else {
+                    return false;
+                }
+            }
+        }
+
+        struct IndividualErrorsIterator<'a> {
+            queue: Vec<&'a DataFusionError>,
+        }
+
+        impl<'a> Iterator for IndividualErrorsIterator<'a> {
+            type Item = &'a DataFusionError;
+
+            fn next(&mut self) -> Option<Self::Item> {
+                while let Some(err) = self.queue.pop() {
+                    if !contains_collection(err) {
+                        return Some(err);
+                    }
+
+                    if let DataFusionError::Collection(errs) = err {
+                        self.queue.extend(errs.iter());
+                        continue;
+                    }
+
+                    if let Some(source) = err
+                        .source()
+                        .map(|source| source.downcast_ref::<DataFusionError>())
+                        .flatten()
+                    {
+                        self.queue.push(source);
+                    }
+                }
+
+                None
+            }
+        }
+
+        IndividualErrorsIterator { queue: vec![self] }
+    }
 }
 
 /// Unwrap an `Option` if possible. Otherwise return an `DataFusionError::Internal`.
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 85ab065b7962..5d9556a2f040 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -703,7 +703,7 @@ pub fn exprlist_to_fields<'a>(
     // Look for exact match in plan's output schema
     let wildcard_schema = find_base_plan(plan).schema();
     let input_schema = plan.schema();
-    let result = exprs
+    let (fields, errs) = exprs
         .into_iter()
         .map(|e| match e {
             Expr::Wildcard { qualifier, options } => match qualifier {
@@ -759,11 +759,19 @@ pub fn exprlist_to_fields<'a>(
             },
             _ => Ok(vec![e.to_field(input_schema)?]),
         })
-        .collect::<Result<Vec<_>>>()?
-        .into_iter()
-        .flatten()
-        .collect();
-    Ok(result)
+        .fold((vec![], vec![]), |(mut fields, mut errs), result| {
+            match result {
+                Ok(this_fields) => fields.extend(this_fields),
+                Err(err) => errs.push(err),
+            }
+            (fields, errs)
+        });
+
+    if !errs.is_empty() {
+        Err(DataFusionError::Collection(errs))
+    } else {
+        Ok(fields)
+    }
 }
 
 /// Find the suitable base plan to expand the wildcard expression recursively.
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index d4cdd5379389..432d04510c6a 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -123,29 +123,41 @@ pub(crate) fn check_columns_satisfy_exprs(
         Expr::Column(_) => Ok(()),
         _ => internal_err!("Expr::Column are required"),
     })?;
+    let mut errs = vec![];
     let column_exprs = find_column_exprs(exprs);
     for e in &column_exprs {
         match e {
             Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
                 for e in exprs {
-                    check_column_satisfies_expr(columns, e, call_purpose)?;
+                    if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                        errs.push(err);
+                    }
                 }
             }
             Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
                 for e in exprs {
-                    check_column_satisfies_expr(columns, e, call_purpose)?;
+                    if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                        errs.push(err);
+                    }
                 }
             }
             Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
                 for exprs in lists_of_exprs {
                     for e in exprs {
-                        check_column_satisfies_expr(columns, e, call_purpose)?;
+                        if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                            errs.push(err);
+                        }
                     }
                 }
             }
-            _ => check_column_satisfies_expr(columns, e, call_purpose)?,
+            _ => if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                errs.push(err);
+            },
         }
     }
+    if !errs.is_empty() {
+        return Err(DataFusionError::Collection(errs));
+    }
     Ok(())
 }
 

From 0506a80838ba3a40120089474bb727b94db30b81 Mon Sep 17 00:00:00 2001
From: Elia Perantoni <eper@wayfare.ai>
Date: Tue, 10 Dec 2024 12:47:20 +0100
Subject: [PATCH 8/8] feat: ignorable errors

---
 datafusion/common/src/column.rs               |  7 +-
 .../common/src/dfschema/fields_spans.rs       |  4 +-
 datafusion/common/src/dfschema/mod.rs         |  1 -
 datafusion/common/src/error.rs                | 37 +++++---
 datafusion/common/src/lib.rs                  |  6 +-
 datafusion/common/src/with_span.rs            |  1 -
 .../expr-common/src/type_coercion/binary.rs   |  1 -
 datafusion/expr/Cargo.toml                    |  1 +
 datafusion/expr/src/expr.rs                   | 37 +++++++-
 datafusion/expr/src/expr_rewriter/mod.rs      |  1 +
 datafusion/expr/src/expr_rewriter/order_by.rs |  1 -
 datafusion/expr/src/expr_schema.rs            |  9 +-
 datafusion/expr/src/logical_plan/builder.rs   | 10 +-
 datafusion/expr/src/logical_plan/plan.rs      |  6 +-
 datafusion/expr/src/tree_node.rs              | 11 ++-
 .../src/analyzer/expand_wildcard_rule.rs      | 27 +++---
 .../optimizer/src/analyzer/type_coercion.rs   |  1 +
 datafusion/optimizer/src/decorrelate.rs       |  7 +-
 .../optimizer/src/optimize_projections/mod.rs |  1 +
 .../src/replace_distinct_aggregate.rs         | 13 ++-
 datafusion/proto-common/src/to_proto/mod.rs   |  2 +-
 datafusion/sql/src/expr/identifier.rs         |  5 +-
 datafusion/sql/src/expr/mod.rs                |  4 +-
 datafusion/sql/src/planner.rs                 |  5 +-
 datafusion/sql/src/relation/join.rs           | 57 +++++++----
 datafusion/sql/src/relation/mod.rs            |  9 +-
 datafusion/sql/src/select.rs                  | 94 +++++++++++++++----
 datafusion/sql/src/statement.rs               |  9 +-
 datafusion/sql/src/unparser/plan.rs           |  2 +
 datafusion/sql/src/unparser/rewrite.rs        |  1 +
 datafusion/sql/src/utils.rs                   | 31 +++---
 31 files changed, 276 insertions(+), 125 deletions(-)

diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index 0ddd1f0a4112..29be76e56d38 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -33,8 +33,8 @@ use std::fmt;
 use std::str::FromStr;
 
 /// A named reference to a qualified field in a schema.
-#[derive(Debug, Clone, Derivative)]
-#[derivative(PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Debug, Derivative)]
+#[derivative(PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
 pub struct Column {
     /// relation/table reference.
     pub relation: Option<TableReference>,
@@ -254,8 +254,7 @@ impl Column {
                         .flat_map(|s| s.columns_with_unqualified_name(&self.name))
                         .collect::<Vec<_>>();
                     for using_col in using_columns {
-                        let all_matched =
-                            columns.iter().all(|c| using_col.contains(c));
+                        let all_matched = columns.iter().all(|c| using_col.contains(c));
                         // All matched fields belong to the same using column set, in orther words
                         // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
diff --git a/datafusion/common/src/dfschema/fields_spans.rs b/datafusion/common/src/dfschema/fields_spans.rs
index bbfa9f2783a7..c4fd0d8406a0 100644
--- a/datafusion/common/src/dfschema/fields_spans.rs
+++ b/datafusion/common/src/dfschema/fields_spans.rs
@@ -9,7 +9,7 @@ pub struct FieldsSpans(Vec<Vec<Span>>);
 
 impl FieldsSpans {
     pub fn empty(field_count: usize) -> Self {
-        Self((0..field_count).into_iter().map(|_| Vec::new()).collect())
+        Self((0..field_count).map(|_| Vec::new()).collect())
     }
 
     pub fn iter(&self) -> impl Iterator<Item = &Vec<Span>> {
@@ -20,7 +20,7 @@ impl FieldsSpans {
         &self,
         other: &FieldsSpans,
         join_type: &JoinType,
-        left_cols_len: usize,
+        _left_cols_len: usize,
     ) -> FieldsSpans {
         match join_type {
             JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
diff --git a/datafusion/common/src/dfschema/mod.rs b/datafusion/common/src/dfschema/mod.rs
index aadcfc423d12..eb07d2705c81 100644
--- a/datafusion/common/src/dfschema/mod.rs
+++ b/datafusion/common/src/dfschema/mod.rs
@@ -20,7 +20,6 @@
 mod fields_spans;
 pub use fields_spans::FieldsSpans;
 
-use std::backtrace::Backtrace;
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::fmt::{Display, Formatter};
 use std::hash::Hash;
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 861fff185c75..46387c982eba 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -28,7 +28,7 @@ use std::sync::Arc;
 
 use crate::diagnostic::Diagnostic;
 use crate::utils::quote_identifier;
-use crate::{Column, DFSchema, DiagnosticEntry, DiagnosticEntryKind, TableReference};
+use crate::{Column, DFSchema, TableReference};
 #[cfg(feature = "avro")]
 use apache_avro::Error as AvroError;
 use arrow::error::ArrowError;
@@ -514,8 +514,7 @@ impl DataFusionError {
                     if let Some(source) = self
                         .head
                         .source()
-                        .map(|source| source.downcast_ref::<DataFusionError>())
-                        .flatten()
+                        .and_then(|source| source.downcast_ref::<DataFusionError>())
                     {
                         self.head = source;
                     } else {
@@ -528,7 +527,9 @@ impl DataFusionError {
         DiagnosticsIterator { head: self }
     }
 
-    pub fn get_individual_errors(&self) -> impl Iterator<Item = &Self> + '_ {
+    pub fn get_individual_errors(
+        &self,
+    ) -> impl Iterator<Item = (Vec<Diagnostic>, &Self)> + '_ {
         fn contains_collection(err: &DataFusionError) -> bool {
             let mut head = err;
             loop {
@@ -538,8 +539,7 @@ impl DataFusionError {
 
                 if let Some(source) = head
                     .source()
-                    .map(|source| source.downcast_ref::<DataFusionError>())
-                    .flatten()
+                    .and_then(|source| source.downcast_ref::<DataFusionError>())
                 {
                     head = source;
                 } else {
@@ -549,29 +549,34 @@ impl DataFusionError {
         }
 
         struct IndividualErrorsIterator<'a> {
-            queue: Vec<&'a DataFusionError>,
+            queue: Vec<(&'a DataFusionError, Vec<Diagnostic>)>,
         }
 
         impl<'a> Iterator for IndividualErrorsIterator<'a> {
-            type Item = &'a DataFusionError;
+            type Item = (Vec<Diagnostic>, &'a DataFusionError);
 
             fn next(&mut self) -> Option<Self::Item> {
-                while let Some(err) = self.queue.pop() {
+                while let Some((err, mut diagnostics_prefix)) = self.queue.pop() {
                     if !contains_collection(err) {
-                        return Some(err);
+                        return Some((diagnostics_prefix, err));
                     }
 
                     if let DataFusionError::Collection(errs) = err {
-                        self.queue.extend(errs.iter());
+                        self.queue.extend(
+                            errs.iter().map(|err| (err, diagnostics_prefix.clone())),
+                        );
                         continue;
                     }
 
+                    if let DataFusionError::Diagnostic(diagnostics, _) = err {
+                        diagnostics_prefix.push(diagnostics.clone());
+                    }
+
                     if let Some(source) = err
                         .source()
-                        .map(|source| source.downcast_ref::<DataFusionError>())
-                        .flatten()
+                        .and_then(|source| source.downcast_ref::<DataFusionError>())
                     {
-                        self.queue.push(source);
+                        self.queue.push((source, diagnostics_prefix));
                     }
                 }
 
@@ -579,7 +584,9 @@ impl DataFusionError {
             }
         }
 
-        IndividualErrorsIterator { queue: vec![self] }
+        IndividualErrorsIterator {
+            queue: vec![(self, vec![])],
+        }
     }
 }
 
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 72ce99f78d00..46dea48e1787 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -33,6 +33,7 @@ pub mod alias;
 pub mod cast;
 pub mod config;
 pub mod cse;
+pub mod diagnostic;
 pub mod display;
 pub mod error;
 pub mod file_options;
@@ -47,15 +48,15 @@ pub mod test_util;
 pub mod tree_node;
 pub mod types;
 pub mod utils;
-pub mod diagnostic;
 pub mod with_span;
 
 /// Reexport arrow crate
 pub use arrow;
 pub use column::Column;
 pub use dfschema::{
-    qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema, FieldsSpans,
+    qualified_name, DFSchema, DFSchemaRef, ExprSchema, FieldsSpans, SchemaExt, ToDFSchema,
 };
+pub use diagnostic::{Diagnostic, DiagnosticEntry, DiagnosticEntryKind};
 pub use error::{
     field_not_found, unqualified_field_not_found, DataFusionError, Result, SchemaError,
     SharedResult,
@@ -78,7 +79,6 @@ pub use stats::{ColumnStatistics, Statistics};
 pub use table_reference::{ResolvedTableReference, TableReference};
 pub use unnest::{RecursionUnnestOption, UnnestOptions};
 pub use utils::project_schema;
-pub use diagnostic::{Diagnostic, DiagnosticEntry, DiagnosticEntryKind};
 pub use with_span::WithSpans;
 
 // These are hidden from docs purely to avoid polluting the public view of what this crate exports.
diff --git a/datafusion/common/src/with_span.rs b/datafusion/common/src/with_span.rs
index d0a953b40578..d0e3c6d5e82b 100644
--- a/datafusion/common/src/with_span.rs
+++ b/datafusion/common/src/with_span.rs
@@ -1,5 +1,4 @@
 use std::{
-    cmp::Ordering,
     fmt::{self, Debug, Display},
     ops::{Deref, DerefMut},
 };
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 847fd6339b22..40fbf6c272bd 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -34,7 +34,6 @@ use datafusion_common::{
     Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result, WithSpans,
 };
 use itertools::Itertools;
-use sqlparser::tokenizer::Span;
 
 /// The type signature of an instantiation of binary operator expression such as
 /// `lhs + rhs`
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 438662e0642b..fcaf96791952 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -56,6 +56,7 @@ serde_json = { workspace = true }
 sqlparser = { workspace = true }
 strum = { version = "0.26.1", features = ["derive"] }
 strum_macros = "0.26.0"
+derivative = { workspace = true }
 
 [dev-dependencies]
 ctor = { workspace = true }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 5fd4faa06242..f2e5891feb4d 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -38,6 +38,7 @@ use datafusion_common::{
     plan_err, Column, DFSchema, HashMap, Result, ScalarValue, TableReference,
 };
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
+use derivative::Derivative;
 use sqlparser::ast::{
     display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
     NullTreatment, RenameSelectItem, ReplaceSelectElement,
@@ -401,11 +402,19 @@ impl Unnest {
 }
 
 /// Alias expression
-#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
+#[derive(Clone, Derivative, Debug)]
+#[derivative(PartialEq, Eq, PartialOrd, Hash)]
 pub struct Alias {
     pub expr: Box<Expr>,
     pub relation: Option<TableReference>,
     pub name: String,
+    #[derivative(
+        PartialEq = "ignore",
+        Hash = "ignore",
+        PartialOrd = "ignore",
+        Ord = "ignore"
+    )]
+    pub span: Span,
 }
 
 impl Alias {
@@ -419,8 +428,13 @@ impl Alias {
             expr: Box::new(expr),
             relation: relation.map(|r| r.into()),
             name: name.into(),
+            span: Span::empty(),
         }
     }
+
+    pub fn with_span(self, span: Span) -> Self {
+        Self { span, ..self }
+    }
 }
 
 /// Binary expression
@@ -1128,6 +1142,7 @@ impl Expr {
                 relation,
                 name,
                 spans: _,
+                ..
             }) => (relation.clone(), name.clone()),
             Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
             _ => (None, self.schema_name().to_string()),
@@ -1681,11 +1696,22 @@ impl Expr {
         }
     }
 
-    pub fn get_spans(&self) -> Option<&Vec<Span>> {
+    pub fn get_span(&self) -> Span {
         match self {
-            Expr::Column(Column { spans, .. }) => Some(spans),
-            Expr::Alias(Alias { expr, .. }) => expr.get_spans(),
-            _ => None,
+            Expr::Column(Column { spans, .. }) => match spans.as_slice() {
+                [] => panic!("No spans for column expr"),
+                [span] => *span,
+                _ => panic!("Column expr has more than one span"),
+            },
+            Expr::Alias(Alias {
+                expr,
+                span: alias_span,
+                ..
+            }) => {
+                let span = expr.get_span();
+                span.union(alias_span)
+            }
+            _ => Span::empty(),
         }
     }
 }
@@ -1701,6 +1727,7 @@ impl HashNode for Expr {
                 expr: _expr,
                 relation,
                 name,
+                ..
             }) => {
                 relation.hash(state);
                 name.hash(state);
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 2c5c83999038..f2557fb11281 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -186,6 +186,7 @@ pub fn create_col_from_scalar_expr(
             relation: _,
             name,
             spans,
+            ..
         }) => Ok(
             Column::new(Some::<TableReference>(subqry_alias.into()), name)
                 .with_spans(spans.iter().copied()),
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index 35d0fc2807db..258991914012 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -23,7 +23,6 @@ use crate::{expr::Sort, Cast, Expr, LogicalPlan, TryCast};
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Column, Result};
-use sqlparser::tokenizer::Span;
 
 /// Rewrite sort on aggregate expressions to sort on the column of aggregate output
 /// For example, `max(x)` is written to `col("max(x)")`
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 6bb909a93111..0a28bc4707c0 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -33,6 +33,7 @@ use datafusion_common::{
 };
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use recursive::recursive;
+use sqlparser::tokenizer::Span;
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -405,16 +406,16 @@ impl ExprSchemable for Expr {
             }) => {
                 let (left_type, left_is_nullable) =
                     left.data_type_and_nullable(schema)?;
-                let left_type = if let Some(spans) = left.get_spans() {
-                    WithSpans::new(&left_type, spans.iter().copied())
+                let left_type = if left.get_span() != Span::empty() {
+                    WithSpans::new(&left_type, [left.get_span()])
                 } else {
                     (&left_type).into()
                 };
 
                 let (right_type, right_is_nullable) =
                     right.data_type_and_nullable(schema)?;
-                let right_type = if let Some(spans) = right.get_spans() {
-                    WithSpans::new(&right_type, spans.iter().copied())
+                let right_type = if right.get_span() != Span::empty() {
+                    WithSpans::new(&right_type, [right.get_span()])
                 } else {
                     (&right_type).into()
                 };
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 445cf3dea3a6..75a0b3433089 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1543,14 +1543,20 @@ pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalP
                         Span::empty(),
                     ),
                     DiagnosticEntry::new(
-                        format!("This side has {} columns", left_plan.schema().fields().len()),
+                        format!(
+                            "This side has {} columns",
+                            left_plan.schema().fields().len()
+                        ),
                         DiagnosticEntryKind::Note,
                         Span::union_iter(
                             left_plan.schema().fields_spans().iter().flatten().copied(),
                         ),
                     ),
                     DiagnosticEntry::new(
-                        format!("This side has {} columns", right_plan.schema().fields().len()),
+                        format!(
+                            "This side has {} columns",
+                            right_plan.schema().fields().len()
+                        ),
                         DiagnosticEntryKind::Note,
                         Span::union_iter(
                             right_plan.schema().fields_spans().iter().flatten().copied(),
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 1f1c8e7e9b52..37e327bcb8cf 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2163,12 +2163,8 @@ impl Projection {
 /// produced by the projection operation. If the schema computation is successful,
 /// the `Result` will contain the schema; otherwise, it will contain an error.
 pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSchema>> {
-
     let metadata = input.schema().metadata().clone();
-    let fields_spans = exprs
-        .iter()
-        .map(|e| e.get_spans().cloned().unwrap_or_else(|| vec![]))
-        .collect();
+    let fields_spans = exprs.iter().map(|e| vec![e.get_span()]).collect();
 
     let schema =
         DFSchema::new_with_metadata(exprlist_to_fields(exprs, input)?, metadata)?
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index eacace5ed046..a017953fd72d 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -127,7 +127,16 @@ impl TreeNode for Expr {
                 expr,
                 relation,
                 name,
-            }) => f(*expr)?.update_data(|e| e.alias_qualified(relation, name)),
+                span,
+                ..
+            }) => f(*expr)?.update_data(|e| {
+                let e = e.alias_qualified(relation, name);
+                if let Expr::Alias(alias) = e {
+                    Expr::Alias(alias.with_span(span))
+                } else {
+                    unreachable!();
+                }
+            }),
             Expr::InSubquery(InSubquery {
                 expr,
                 subquery,
diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
index 225b6098a9eb..bd883b2c2033 100644
--- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
@@ -124,28 +124,23 @@ fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> {
                 ref relation,
                 ref name,
                 ref spans,
+                ..
             }) => {
                 if name.eq("*") {
                     let expanded_columns = if let Some(qualifier) = relation {
-                        expand_qualified_wildcard(
-                            qualifier,
-                            input.schema(),
-                            None,
-                        )?
+                        expand_qualified_wildcard(qualifier, input.schema(), None)?
                     } else {
-                        expand_wildcard(
-                            input.schema(),
-                            input,
-                            None,
-                        )?
+                        expand_wildcard(input.schema(), input, None)?
                     };
 
-                    projected_expr.extend(expanded_columns.into_iter().map(|mut expr| {
-                        if let Expr::Column(c) = &mut expr {
-                            c.spans = spans.clone();
-                        }
-                        expr
-                    }));
+                    projected_expr.extend(expanded_columns.into_iter().map(
+                        |mut expr| {
+                            if let Expr::Column(c) = &mut expr {
+                                c.spans = spans.clone();
+                            }
+                            expr
+                        },
+                    ));
                 } else {
                     projected_expr.push(e.clone());
                 }
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index b585beb1e7d5..904face71bec 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -1001,6 +1001,7 @@ fn project_with_column_index(
                 relation: _,
                 ref name,
                 spans: _,
+                ..
             }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
             Expr::Alias { .. } | Expr::Column { .. } => e,
             _ => e.alias(schema.field(i).name()),
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 5c519dce8fbb..050084964dba 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -532,7 +532,12 @@ fn proj_exprs_evaluation_result_on_empty_batch(
             let result_expr = simplifier.simplify(result_expr)?;
             let expr_name = match expr {
                 Expr::Alias(Alias { name, .. }) => name.to_string(),
-                Expr::Column(Column { relation: _, name, spans: _ }) => name.to_string(),
+                Expr::Column(Column {
+                    relation: _,
+                    name,
+                    spans: _,
+                    ..
+                }) => name.to_string(),
                 _ => expr.schema_name().to_string(),
             };
             expr_result_map_for_count_bug.insert(expr_name, result_expr);
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 1519c54dbf68..81e60ffd9da5 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -493,6 +493,7 @@ fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Project
                 expr,
                 relation,
                 name,
+                ..
             }) => rewrite_expr(*expr, &prev_projection).map(|result| {
                 result.update_data(|expr| Expr::Alias(Alias::new(expr, relation, name)))
             }),
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 4787ba8d4ca7..992a926cd91f 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -157,10 +157,15 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
                     .iter()
                     .skip(expr_cnt)
                     .zip(schema.iter())
-                    .map(|((new_qualifier, new_field, _), (old_qualifier, old_field, _))| {
-                        col(Column::from((new_qualifier, new_field)))
-                            .alias_qualified(old_qualifier.cloned(), old_field.name())
-                    })
+                    .map(
+                        |(
+                            (new_qualifier, new_field, _),
+                            (old_qualifier, old_field, _),
+                        )| {
+                            col(Column::from((new_qualifier, new_field)))
+                                .alias_qualified(old_qualifier.cloned(), old_field.name())
+                        },
+                    )
                     .collect::<Vec<Expr>>();
 
                 let plan = LogicalPlanBuilder::from(plan)
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 1b9583516ced..adad67031487 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -266,7 +266,7 @@ impl TryFrom<&DFSchema> for protobuf::DfSchema {
     fn try_from(s: &DFSchema) -> Result<Self, Self::Error> {
         let columns = s
             .iter()
-            .map(|(qualifier, field)| {
+            .map(|(qualifier, field, _)| {
                 Ok(protobuf::DfField {
                     field: Some(field.as_ref().try_into()?),
                     qualifier: qualifier.map(|r| protobuf::ColumnRelation {
diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs
index 735221eae6d5..7ee5bd45a058 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -20,8 +20,7 @@ use sqlparser::ast::{Expr as SQLExpr, Ident};
 
 use datafusion_common::{
     internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema,
-    DataFusionError, Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result,
-    TableReference,
+    DataFusionError, Result, TableReference,
 };
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{Case, Expr};
@@ -114,7 +113,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 })?;
             Ok(Expr::ScalarVariable(ty, var_names))
         } else {
-            let span = ids.last().map(|id| id.span).unwrap_or(Span::empty());
+            let span = Span::union_iter(ids.iter().map(|id| id.span));
             let ids = ids
                 .into_iter()
                 .map(|id| self.ident_normalizer.normalize(id))
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index c08a9fd44e04..aa4dbb37e117 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -157,7 +157,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         }
                         _ => false,
                     }) {
-                        Some((qualifier, df_field, _)) => Expr::from((qualifier, df_field)),
+                        Some((qualifier, df_field, _)) => {
+                            Expr::from((qualifier, df_field))
+                        }
                         None => Expr::Column(col),
                     }
                 }
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 33b3b97044af..ad30fc3bbb5d 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -394,7 +394,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                                             col.name, r
                                         ),
                                         DiagnosticEntryKind::Error,
-                                        col.spans.first().copied().unwrap_or(Span::empty()),
+                                        col.spans
+                                            .first()
+                                            .copied()
+                                            .unwrap_or(Span::empty()),
                                     )])
                                 })
                             },
diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs
index 2ed1197e8fbf..faec947ce5d8 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, Column, Result};
-use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
+use datafusion_common::{not_impl_err, Column, DataFusionError, Result, ScalarValue};
+use datafusion_expr::{Expr, JoinType, LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableFactor, TableWithJoins};
 use std::collections::HashSet;
 
@@ -26,19 +26,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         &self,
         t: TableWithJoins,
         planner_context: &mut PlannerContext,
-    ) -> Result<LogicalPlan> {
+    ) -> Result<(LogicalPlan, Option<DataFusionError>)> {
         let mut left = if is_lateral(&t.relation) {
             self.create_relation_subquery(t.relation, planner_context)?
         } else {
             self.create_relation(t.relation, planner_context)?
         };
+        let mut ignorable_error = None;
         let old_outer_from_schema = planner_context.outer_from_schema();
         for join in t.joins {
             planner_context.extend_outer_from_schema(left.schema())?;
-            left = self.parse_relation_join(left, join, planner_context)?;
+            (left, ignorable_error) =
+                self.parse_relation_join(left, join, planner_context)?;
         }
         planner_context.set_outer_from_schema(old_outer_from_schema);
-        Ok(left)
+        Ok((left, ignorable_error))
     }
 
     fn parse_relation_join(
@@ -46,7 +48,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         left: LogicalPlan,
         join: Join,
         planner_context: &mut PlannerContext,
-    ) -> Result<LogicalPlan> {
+    ) -> Result<(LogicalPlan, Option<DataFusionError>)> {
         let right = if is_lateral_join(&join)? {
             self.create_relation_subquery(join.relation, planner_context)?
         } else {
@@ -93,7 +95,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             JoinOperator::FullOuter(constraint) => {
                 self.parse_join(left, right, constraint, JoinType::Full, planner_context)
             }
-            JoinOperator::CrossJoin => self.parse_cross_join(left, right),
+            JoinOperator::CrossJoin => {
+                let plan = self.parse_cross_join(left, right)?;
+                Ok((plan, None))
+            }
             other => not_impl_err!("Unsupported JOIN operator {other:?}"),
         }
     }
@@ -113,24 +118,32 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         constraint: JoinConstraint,
         join_type: JoinType,
         planner_context: &mut PlannerContext,
-    ) -> Result<LogicalPlan> {
+    ) -> Result<(LogicalPlan, Option<DataFusionError>)> {
         match constraint {
             JoinConstraint::On(sql_expr) => {
                 let join_schema = left.schema().join(right.schema())?;
                 // parse ON expression
-                let expr = self.sql_to_expr(sql_expr, &join_schema, planner_context)?;
-                LogicalPlanBuilder::from(left)
+                let (expr, ignorable_error) =
+                    match self.sql_to_expr(sql_expr, &join_schema, planner_context) {
+                        Ok(expr) => (expr, None),
+                        Err(e) => {
+                            (Expr::Literal(ScalarValue::Boolean(Some(false))), Some(e))
+                        }
+                    };
+                let plan = LogicalPlanBuilder::from(left)
                     .join_on(right, join_type, Some(expr))?
-                    .build()
+                    .build()?;
+                Ok((plan, ignorable_error))
             }
             JoinConstraint::Using(idents) => {
                 let keys: Vec<Column> = idents
                     .into_iter()
                     .map(|x| Column::from_name(self.ident_normalizer.normalize(x)))
                     .collect();
-                LogicalPlanBuilder::from(left)
+                let plan = LogicalPlanBuilder::from(left)
                     .join_using(right, join_type, keys)?
-                    .build()
+                    .build()?;
+                Ok((plan, None))
             }
             JoinConstraint::Natural => {
                 let left_cols: HashSet<&String> =
@@ -143,17 +156,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     .filter(|f| left_cols.contains(f))
                     .map(Column::from_name)
                     .collect();
-                if keys.is_empty() {
-                    self.parse_cross_join(left, right)
+                let plan = if keys.is_empty() {
+                    self.parse_cross_join(left, right)?
                 } else {
                     LogicalPlanBuilder::from(left)
                         .join_using(right, join_type, keys)?
-                        .build()
-                }
+                        .build()?
+                };
+                Ok((plan, None))
+            }
+            JoinConstraint::None => {
+                let plan = LogicalPlanBuilder::from(left)
+                    .join_on(right, join_type, [])?
+                    .build()?;
+                Ok((plan, None))
             }
-            JoinConstraint::None => LogicalPlanBuilder::from(left)
-                .join_on(right, join_type, [])?
-                .build(),
         }
     }
 }
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index 45a617daae96..56402aa0353a 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -100,7 +100,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 table_with_joins,
                 alias,
             } => (
-                self.plan_table_with_joins(*table_with_joins, planner_context)?,
+                {
+                    let (plan, ignorable_error) =
+                        self.plan_table_with_joins(*table_with_joins, planner_context)?;
+                    if let Some(e) = ignorable_error {
+                        return Err(e);
+                    }
+                    plan
+                },
                 alias,
             ),
             TableFactor::UNNEST {
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index b9ffce0b2af6..af3ed62ed7a8 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -27,7 +27,8 @@ use crate::utils::{
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion_common::{
-    not_impl_err, plan_err, Diagnostic, DiagnosticEntry, DiagnosticEntryKind, Result,
+    not_impl_err, plan_err, DataFusionError, Diagnostic, DiagnosticEntry,
+    DiagnosticEntryKind, Result,
 };
 use datafusion_common::{RecursionUnnestOption, UnnestOptions};
 use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
@@ -75,8 +76,17 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             return not_impl_err!("SORT BY");
         }
 
+        let mut errs = vec![];
+
         // Process `from` clause
-        let plan = self.plan_from_tables(select.from, planner_context)?;
+        let plan = {
+            let (plan, ignorable_error) =
+                self.plan_from_tables(select.from, planner_context)?;
+            if let Some(err) = ignorable_error {
+                errs.push(err);
+            }
+            plan
+        };
         let empty_from = matches!(plan, LogicalPlan::EmptyRelation(_));
 
         // Process `where` clause
@@ -198,13 +208,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             .is_empty()
             || !aggr_exprs.is_empty()
         {
-            self.aggregate(
+            match self.aggregate(
                 &base_plan,
                 &select_exprs,
                 having_expr_opt.as_ref(),
                 &group_by_exprs,
                 &aggr_exprs,
-            )?
+            ) {
+                Err(err) => {
+                    errs.push(err);
+                    (base_plan.clone(), select_exprs.clone(), having_expr_opt)
+                }
+                Ok((plan, select_exprs_post_aggr, having_expr_post_aggr)) => {
+                    (plan, select_exprs_post_aggr, having_expr_post_aggr)
+                }
+            }
         } else {
             match having_expr_opt {
                 Some(having_expr) => return plan_err!("HAVING clause references: {having_expr} must appear in the GROUP BY clause or be used in an aggregate function"),
@@ -288,7 +306,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             plan
         };
 
-        self.order_by(plan, order_by_rex)
+        let plan = self.order_by(plan, order_by_rex)?;
+
+        if !errs.is_empty() {
+            Err(DataFusionError::Collection(errs))
+        } else {
+            Ok(plan)
+        }
     }
 
     /// Try converting Expr(Unnest(Expr)) to Projection/Unnest/Projection
@@ -537,9 +561,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         &self,
         mut from: Vec<TableWithJoins>,
         planner_context: &mut PlannerContext,
-    ) -> Result<LogicalPlan> {
+    ) -> Result<(LogicalPlan, Option<DataFusionError>)> {
         match from.len() {
-            0 => Ok(LogicalPlanBuilder::empty(true).build()?),
+            0 => {
+                let plan = LogicalPlanBuilder::empty(true).build()?;
+                Ok((plan, None))
+            }
             1 => {
                 let input = from.remove(0);
                 self.plan_table_with_joins(input, planner_context)
@@ -547,24 +574,49 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             _ => {
                 let mut from = from.into_iter();
 
-                let mut left = LogicalPlanBuilder::from({
-                    let input = from.next().unwrap();
-                    self.plan_table_with_joins(input, planner_context)?
-                });
+                let mut ignorable_error = None;
+                let mut add_ignorable_error = |new_ignorable_err: Option<
+                    DataFusionError,
+                >| match new_ignorable_err {
+                    None => (),
+                    Some(new_ignorable_err) => {
+                        if ignorable_error.is_none() {
+                            ignorable_error = Some(DataFusionError::Collection(vec![]));
+                        }
+                        if let Some(DataFusionError::Collection(ignorable_errors)) =
+                            &mut ignorable_error
+                        {
+                            ignorable_errors.push(new_ignorable_err);
+                        }
+                    }
+                };
+
+                let left = {
+                    let (left, ignorable_error) = self
+                        .plan_table_with_joins(from.next().unwrap(), planner_context)?;
+                    add_ignorable_error(ignorable_error);
+                    left
+                };
+                let mut left = LogicalPlanBuilder::from(left);
                 let old_outer_from_schema = {
                     let left_schema = Some(Arc::clone(left.schema()));
                     planner_context.set_outer_from_schema(left_schema)
                 };
                 for input in from {
                     // Join `input` with the current result (`left`).
-                    let right = self.plan_table_with_joins(input, planner_context)?;
+                    let right = {
+                        let (right, ignorable_error) =
+                            self.plan_table_with_joins(input, planner_context)?;
+                        add_ignorable_error(ignorable_error);
+                        right
+                    };
                     left = left.cross_join(right)?;
                     // Update the outer FROM schema.
                     let left_schema = Some(Arc::clone(left.schema()));
                     planner_context.set_outer_from_schema(left_schema);
                 }
                 planner_context.set_outer_from_schema(old_outer_from_schema);
-                left.build()
+                Ok((left.build()?, ignorable_error))
             }
         }
     }
@@ -606,6 +658,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 Ok(vec![col])
             }
             SelectItem::ExprWithAlias { expr, alias } => {
+                let alias_span = alias.span;
                 let select_expr =
                     self.sql_to_expr(expr, plan.schema(), planner_context)?;
                 let col = normalize_col_with_schemas_and_ambiguity_check(
@@ -617,7 +670,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 // avoiding adding an alias if the column name is the same.
                 let expr = match &col {
                     Expr::Column(column) if column.name.eq(&name) => col,
-                    _ => col.alias(name),
+                    _ => {
+                        let expr = col.alias(name);
+                        if let Expr::Alias(alias) = expr {
+                            Expr::Alias(alias.with_span(alias_span))
+                        } else {
+                            unreachable!();
+                        }
+                    }
                 };
                 Ok(vec![expr])
             }
@@ -815,11 +875,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 Diagnostic::new([DiagnosticEntry::new(
                     "GROUP BY clause is here",
                     DiagnosticEntryKind::Note,
-                    Span::union_iter(
-                        group_by_exprs
-                            .iter()
-                            .flat_map(|e| e.get_spans().cloned().unwrap_or_default()),
-                    ),
+                    Span::union_iter(group_by_exprs.iter().map(|e| e.get_span())),
                 )])
             })
         })?;
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 572ac8221b1c..330fdfc39e83 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1714,7 +1714,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // Build scan, join with from table if it exists.
         let mut input_tables = vec![table];
         input_tables.extend(from);
-        let scan = self.plan_from_tables(input_tables, &mut planner_context)?;
+        let scan = {
+            let (scan, ignorable_error) =
+                self.plan_from_tables(input_tables, &mut planner_context)?;
+            if let Some(ignorable_error) = ignorable_error {
+                return Err(ignorable_error);
+            }
+            scan
+        };
 
         // Filter
         let source = match predicate_expr {
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 04b8a99eacc9..b4ada73143e4 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -924,11 +924,13 @@ impl Unparser<'_> {
                         relation: _,
                         name: left_name,
                         spans: _,
+                        ..
                     }),
                     Expr::Column(Column {
                         relation: _,
                         name: right_name,
                         spans: _,
+                        ..
                     }),
                 ) if left_name == right_name => {
                     idents.push(self.new_ident_quoted_if_needs(left_name.to_string()));
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index 68af121a4117..4dc56beb5adc 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -311,6 +311,7 @@ pub(super) fn inject_column_aliases(
                 expr: Box::new(expr.clone()),
                 relation,
                 name: col_alias.value,
+                span: col_alias.span,
             })
         })
         .collect::<Vec<_>>();
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 432d04510c6a..fa4ba20c4676 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -123,20 +123,25 @@ pub(crate) fn check_columns_satisfy_exprs(
         Expr::Column(_) => Ok(()),
         _ => internal_err!("Expr::Column are required"),
     })?;
+
     let mut errs = vec![];
     let column_exprs = find_column_exprs(exprs);
     for e in &column_exprs {
         match e {
             Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
                 for e in exprs {
-                    if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                    if let Err(err) =
+                        check_column_satisfies_expr(columns, e, call_purpose)
+                    {
                         errs.push(err);
                     }
                 }
             }
             Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
                 for e in exprs {
-                    if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                    if let Err(err) =
+                        check_column_satisfies_expr(columns, e, call_purpose)
+                    {
                         errs.push(err);
                     }
                 }
@@ -144,15 +149,19 @@ pub(crate) fn check_columns_satisfy_exprs(
             Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
                 for exprs in lists_of_exprs {
                     for e in exprs {
-                        if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                        if let Err(err) =
+                            check_column_satisfies_expr(columns, e, call_purpose)
+                        {
                             errs.push(err);
                         }
                     }
                 }
             }
-            _ => if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
-                errs.push(err);
-            },
+            _ => {
+                if let Err(err) = check_column_satisfies_expr(columns, e, call_purpose) {
+                    errs.push(err);
+                }
+            }
         }
     }
     if !errs.is_empty() {
@@ -177,11 +186,11 @@ fn check_column_satisfies_expr(
             let message = match call_purpose {
                 CheckColumnsSatisfyExprsPurpose::GroupBy => format!(
                     "'{}' in projection does not appear in GROUP BY clause",
-                    expr.to_string()
+                    expr
                 ),
                 CheckColumnsSatisfyExprsPurpose::Having => format!(
                     "'{}' in HAVING clause does not appear in GROUP BY clause",
-                    expr.to_string()
+                    expr
                 ),
             };
             err.with_diagnostic(|_| {
@@ -189,12 +198,10 @@ fn check_column_satisfies_expr(
                     DiagnosticEntry::new(
                         message,
                         DiagnosticEntryKind::Error,
-                        expr.get_spans()
-                            .and_then(|spans| spans.first().copied())
-                            .unwrap_or(Span::empty()),
+                        expr.get_span(),
                     ),
                     DiagnosticEntry::new(
-                        format!("Add '{}' to GROUP BY clause", expr.to_string()),
+                        format!("Add '{}' to GROUP BY clause", expr),
                         DiagnosticEntryKind::Help,
                         Span::empty(),
                     ),