Skip to content

Commit

Permalink
break exaxmples out to tests
Browse files Browse the repository at this point in the history
  • Loading branch information
crisptrutski committed Oct 21, 2024
1 parent 64c0f58 commit 4526e2b
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 79 deletions.
103 changes: 24 additions & 79 deletions src/macaw/scope_experiments.clj
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
(instance? Table node) [:table (.getName node)]
:else [(type node) node]))

(defn- semantic-map [sql]
(defn semantic-map
"Name is a bit of a shame, for now this is a fairly low level representation of how we walk the query"
[sql]
(mw/fold-query (m/parsed-query sql)
{:every-node (fn [acc node ctx]
(let [id (m/scope-id (first ctx))
Expand All @@ -31,76 +33,33 @@
(update-in [:children parent-id] (fnil conj #{}) id))
acc')))
(update :sequence (fnil conj []) [id node]))))}
{:scopes {} ;; id -> {:path [labels], :children [nodes]}
:parents {} ;; what scope is this inside?
:children {} ;; what scopes are inside?
:sequence []})) ;; [scope-id, node]
{:scopes {} ;; id -> {:path [labels], :children [nodes]}
:parents {} ;; what scope is this inside?
:children {} ;; what scopes are inside?
:sequence []})) ;; [scope-id, node]

(comment
(semantic-map "select x from t, u, v left join w on w.id = v.id where t.id = u.id and u.id = v.id limit 3")
;{:scopes {1 {:path ["SELECT"], :children [[:column "x"]]},
; 2 {:path ["SELECT" "FROM"], :children [[:table "t"]]},
; 4 {:path ["SELECT" "JOIN" "FROM"], :children [[:table "u"]]},
; 5 {:path ["SELECT" "JOIN" "FROM"], :children [[:table "v"]]},
; 6 {:path ["SELECT" "JOIN" "FROM"], :children [[:table "w"]]},
; 3 {:path ["SELECT" "JOIN"], :children [[:column "id"] [:table "w"] [:column "id"] [:table "v"]]},
; 7 {:path ["SELECT" "WHERE"],
; :children [[:column "id"]
; [:table "t"]
; [:column "id"]
; [:table "u"]
; [:column "id"]
; [:table "u"]
; [:column "id"]
; [:table "v"]]}},
; :parents {2 1, 4 3, 5 3, 6 3, 3 1, 7 1},
; :children {1 #{7 3 2}, 3 #{4 6 5}},
; :sequence [[1 [:column "x"]]
; [2 [:table "t"]]
; [4 [:table "u"]]
; [5 [:table "v"]]
; [6 [:table "w"]]
; [3 [:column "id"]]
; [3 [:table "w"]]
; [3 [:column "id"]]
; [3 [:table "v"]]
; [7 [:column "id"]]
; [7 [:table "t"]]
; [7 [:column "id"]]
; [7 [:table "u"]]
; [7 [:column "id"]]
; [7 [:table "u"]]
; [7 [:column "id"]]
; [7 [:table "v"]]]}
(defn- ->descendants
"Given a direct mapping, get back the transitive mapping"
[parent->children]
(reduce
(fn [acc parent-id]
(let [children (parent->children parent-id)]
(assoc acc parent-id (into (set children) (mapcat acc) children))))
{}
;; guarantee we process each node before its parent
(reverse (sort (keys parent->children)))))


(semantic-map "select t.a,b,c,d from t")
;{:scopes {1 {:path ["select"], :children [[:column "a"] [:column "b"] [:column "c"] [:column "d"]]},
; 2 {:path ["select" "from"], :children [[:table "t"]]}},
; :parents {2 1},
; :children {1 #{2}},
; :sequence [[1 [:column "a"]] [1 [:column "b"]] [1 [:column "c"]] [1 [:column "d"]] [2 [:table "t"]]]}
)

(defn- get-descendants-map [parent-children-map]
(letfn [(get-all-descendants [parent]
(let [children (get parent-children-map parent [])]
(into #{} (concat children
(mapcat #(get-all-descendants %)
children)))))]
(into {}
(for [parent (keys parent-children-map)]
[parent (get-all-descendants parent)]))))

(defn fields->tables-in-scope [sql]
(defn fields->tables-in-scope
"Build a map of each to field to all the tables that are in scope when its referenced"
[sql]
(let [sm (semantic-map sql)
tables (filter (comp #{:table} first second) (:sequence sm))
scope->tables (reduce
(fn [m [scope-id [_ table-name]]]
(update m scope-id (fnil conj #{}) table-name))
{}
tables)
scope->descendants (get-descendants-map (:children sm))
scope->descendants (->descendants (:children sm))
scope->nested-tables (reduce
(fn [m parent-id]
(assoc m parent-id
Expand All @@ -115,25 +74,11 @@
#{table-name}
(scope->nested-tables scope-id))])))))

(defn- fields-to-search [f->ts]
(defn fields-to-search
"Get a set of qualified columns. Where the qualification was uncertain, we enumerate all possibilities"
[f->ts]
(into (sorted-set)
(mapcat (fn [[[_ column-name] table-names]]
(map #(vector :table % :column column-name) table-names)))

f->ts))

(comment
;; like source-columns, but understands scope
(fields-to-search
(fields->tables-in-scope "select x from t, u, v left join w on w.a = v.a where t.b = u.b and u.c = v.c limit 3"))
;#{[:table "t" :column "b"]
; [:table "t" :column "x"]
; [:table "u" :column "b"]
; [:table "u" :column "c"]
; [:table "u" :column "x"]
; [:table "v" :column "a"]
; [:table "v" :column "c"]
; [:table "v" :column "x"]
; [:table "w" :column "a"]
; [:table "w" :column "x"]}
)
69 changes: 69 additions & 0 deletions test/macaw/scope_experiments.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
(ns macaw.scope-experiments
(:require
[clojure.test :refer :all]
[macaw.scope-experiments :as mse]))

(deftest semantic-map-test
(is (= (mse/semantic-map "select x from t, u, v left join w on w.id = v.id where t.id = u.id and u.id = v.id limit 3")
{:scopes {1 {:path ["SELECT"], :children [[:column nil "x"]]},
2 {:path ["SELECT" "FROM"], :children [[:table "t"]]},
4 {:path ["SELECT" "JOIN" "FROM"], :children [[:table "u"]]},
5 {:path ["SELECT" "JOIN" "FROM"], :children [[:table "v"]]},
6 {:path ["SELECT" "JOIN" "FROM"], :children [[:table "w"]]},
3 {:path ["SELECT" "JOIN"], :children [[:column "w" "id"] [:table "w"] [:column "v" "id"] [:table "v"]]},
7 {:path ["SELECT" "WHERE"],
:children [[:column "t" "id"]
[:table "t"]
[:column "u" "id"]
[:table "u"]
[:column "u" "id"]
[:table "u"]
[:column "v" "id"]
[:table "v"]]}},
:parents {2 1, 4 3, 5 3, 6 3, 3 1, 7 1},
:children {1 #{7 3 2}, 3 #{4 6 5}},
:sequence [[1 [:column nil "x"]]
[2 [:table "t"]]
[4 [:table "u"]]
[5 [:table "v"]]
[6 [:table "w"]]
[3 [:column "w" "id"]]
[3 [:table "w"]]
[3 [:column "v" "id"]]
[3 [:table "v"]]
[7 [:column "t" "id"]]
[7 [:table "t"]]
[7 [:column "u" "id"]]
[7 [:table "u"]]
[7 [:column "u" "id"]]
[7 [:table "u"]]
[7 [:column "v" "id"]]
[7 [:table "v"]]]}))

(is (= (mse/semantic-map "select t.a,b,c,d from t")
{:scopes {1 {:path ["SELECT"],
:children [[:column "t" "a"] [:table "t"] [:column nil "b"] [:column nil "c"] [:column nil "d"]]},
2 {:path ["SELECT" "FROM"], :children [[:table "t"]]}},
:parents {2 1},
:children {1 #{2}},
:sequence [[1 [:column "t" "a"]]
[1 [:table "t"]]
[1 [:column nil "b"]]
[1 [:column nil "c"]]
[1 [:column nil "d"]]
[2 [:table "t"]]]})))

(deftest fields-to-search-test
;; like source-columns, but understands scope
(is (= (mse/fields-to-search
(mse/fields->tables-in-scope "select x from t, u, v left join w on w.a = v.a where t.b = u.b and u.c = v.c limit 3"))
#{[:table "t" :column "b"]
[:table "t" :column "x"]
[:table "u" :column "b"]
[:table "u" :column "c"]
[:table "u" :column "x"]
[:table "v" :column "a"]
[:table "v" :column "c"]
[:table "v" :column "x"]
[:table "w" :column "a"]
[:table "w" :column "x"]})))

0 comments on commit 4526e2b

Please sign in to comment.