Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Table extraction for basic queries #105

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions java/com/metabase/macaw/BasicTableExtractor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package com.metabase.macaw;

import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.statement.Statement;
import net.sf.jsqlparser.statement.select.*;

import java.util.*;
import java.util.function.Consumer;

/**
* Return a simplified query representation we can work with further, if possible.
*/
@SuppressWarnings({
"PatternVariableCanBeUsed", "IfCanBeSwitch"} // don't force a newer JVM version
)
public final class BasicTableExtractor {

public enum ErrorType {
UNSUPPORTED_EXPRESSION,
INVALID_QUERY,
UNABLE_TO_PARSE
}

public static class AnalysisError extends RuntimeException {
public final ErrorType errorType;
public final Throwable cause;

public AnalysisError(ErrorType errorType) {
this.errorType = errorType;
this.cause = null;
}

public AnalysisError(ErrorType errorType, Throwable cause) {
this.errorType = errorType;
this.cause = cause;
}
}

public static Set<Table> getTables(Statement statement) {
try {
if (statement instanceof Select) {
return getTables((Select) statement);
}
// This is not a query, it's probably a statement.
throw new AnalysisError(ErrorType.INVALID_QUERY);
} catch (IllegalArgumentException e) {
// This query uses features that we do not yet support translating.
throw new AnalysisError(ErrorType.UNABLE_TO_PARSE, e);
}
}

private static Set<Table> getTables(Select select) {
if (select instanceof PlainSelect) {
return getTables(select.getPlainSelect());
} else {
// We don't support more complex kinds of select statements yet.
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}
}

private static Set<Table> getTables(PlainSelect select) {
// these are fine, but irrelevant
//
// - select.getDistinct()
// - select.getHaving()
// - select.getKsqlWindow()
// - select.getMySqlHintStraightJoin()
// - select.getMySqlSqlCacheFlag()
// - select.getLimitBy()
// - select.getOffset()
// - select.getOptimizeFor()
// - select.getOracleHint()
// - select.getTop()
// - select.getWait()

// Not currently parseable
if (select.getLateralViews() != null ||
select.getOracleHierarchical() != null ||
select.getWindowDefinitions() != null) {
throw new AnalysisError(ErrorType.UNABLE_TO_PARSE);
}

// Not currently supported
if (select.getWithItemsList() != null) {
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}

// any of these - nope out
if (select.getFetch() != null ||
select.getFirst() != null ||
select.getForClause() != null ||
select.getForMode() != null ||
select.getForUpdateTable() != null ||
select.getForXmlPath() != null ||
select.getIntoTables() != null ||
select.getIsolation() != null ||
select.getSkip() != null) {
throw new AnalysisError(ErrorType.INVALID_QUERY);
}

Set<Table> tables = new HashSet<>();

for (SelectItem item : select.getSelectItems()) {
if (item.getExpression() instanceof ParenthesedSelect) {
// Do not allow sub-selects.
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}
}

Consumer<FromItem> pushOrThrow = (FromItem item) -> {
if (item instanceof Table) {
Table table = (Table) item;
if (table.getName().contains("*")) {
// Do not allow table wildcards.
throw new AnalysisError(ErrorType.INVALID_QUERY);
}
tables.add(table);
} else if (item instanceof TableFunction) {
// Do not allow dynamic tables
throw new AnalysisError(ErrorType.INVALID_QUERY);
} else {
// Only allow simple table references.
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}
};

if (select.getFromItem() != null) {
pushOrThrow.accept(select.getFromItem());
List<Join> joins = select.getJoins();
if (joins != null) {
joins.stream().map(Join::getFromItem).forEach(pushOrThrow);
}
}
return tables;
}

}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very clean

32 changes: 28 additions & 4 deletions src/macaw/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
[macaw.collect :as collect]
[macaw.rewrite :as rewrite])
(:import
(com.metabase.macaw AstWalker$Scope)
(com.metabase.macaw AstWalker$Scope BasicTableExtractor BasicTableExtractor$AnalysisError)
(java.util.function Consumer)
(net.sf.jsqlparser JSQLParserException)
(net.sf.jsqlparser.parser CCJSqlParser CCJSqlParserUtil)
(net.sf.jsqlparser.parser.feature Feature)))
(net.sf.jsqlparser.parser.feature Feature)
(net.sf.jsqlparser.schema Table)))

(set! *warn-on-reflection* true)

Expand Down Expand Up @@ -83,15 +85,37 @@
(defn- raw-components [xs]
(into (empty xs) (keep :component) xs))

(defn- table->identifier
"Given a table object, return a map with the schema and table names."
[^Table t]
(if (.getSchemaName t)
{:schema (.getSchemaName t)
:table (.getName t)}
{:table (.getName t)}))

(defn- ->macaw-error [^BasicTableExtractor$AnalysisError analysis-error]
(keyword "macaw.error" (-> (.-errorType analysis-error)
str/lower-case
(str/replace #"_" "-"))))

(defmacro ^:private kw-or-tables [expr]
`(try (map table->identifier ~expr)
(catch BasicTableExtractor$AnalysisError e#
(->macaw-error e#))
(catch JSQLParserException _e#
:macaw.error/unable-to-parse)))

(defn query->tables
"Given a parsed query (i.e., a [subclass of] `Statement`) return a set of all the table identifiers found within it."
[sql & {:keys [mode] :as opts}]
(case mode
:ast-walker-1 (-> (parsed-query sql)
:ast-walker-1 (-> (parsed-query sql opts)
(query->components opts)
:tables
raw-components)
:basic-select :macaw.error/not-implemented))
:basic-select (->> (parsed-query sql opts)
(BasicTableExtractor/getTables)
kw-or-tables)))

(defn replace-names
"Given an SQL query, apply the given table, column, and schema renames.
Expand Down
20 changes: 14 additions & 6 deletions test/macaw/acceptance_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@
:basic-select})

(def global-overrides
{:basic-select :macaw.error/not-implemented})
{})

(def ns-overrides
{:basic-select {"compound" :macaw.error/unsupported-expression
"mutation" :macaw.error/invalid-query
"dynamic" :macaw.error/invalid-query}})

(def ^:private merged-fixtures-file "test/resources/acceptance/queries.sql")

Expand Down Expand Up @@ -71,8 +76,9 @@
(when (keyword? x)
x))

(defn- get-override [expected-cs mode ck]
(defn- get-override [expected-cs mode fixture ck]
(or (get global-overrides mode)
(get-in ns-overrides [mode (namespace fixture)])
(get-in expected-cs [:overrides mode :error])
(get-in expected-cs [:overrides :error])
(get-in expected-cs [:overrides mode ck])
Expand Down Expand Up @@ -104,16 +110,17 @@
(doseq [[ck cv] (dissoc expected-cs :overrides :error)]
(testing (str prefix " analysis is correct: " (name ck))
(let [actual-cv (get-component cs ck)
override (get-override expected-cs m ck)]
override (get-override expected-cs m fixture ck)]
(validate-analysis cv override actual-cv))))))
;; Testing path for newer modes.
(let [correct (:error expected-cs (:tables expected-cs))
override (get-override expected-cs m :tables)
override (get-override expected-cs m fixture :tables)
;; For now, we only support (and test) :tables
tables (testing (str prefix " table analysis does not throw for mode " m)
(is (ct/tables sql opts)))]
(testing (str prefix " table analysis is correct for mode " m)
(validate-analysis correct override tables)))))
(when-not (and (nil? correct) (nil? override))
(testing (str prefix " table analysis is correct for mode " m)
(validate-analysis correct override tables))))))

(when renames
(let [broken? (:broken? renames)
Expand Down Expand Up @@ -178,6 +185,7 @@
(str/trim
(ct/query-fixture fixture))))))

(test-fixture :dynamic/generate-series)

(test-fixture :compound/cte)
(test-fixture :compound/cte-nonambiguous)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
:overrides
{:basic-select
;; do not allow wildcard selects
{:error :macaw.error/illegal-expression}
{:error :macaw.error/invalid-query}

;; Just plain old wacky
:source-columns [{:table "`project_id.dataset_id.table_*`", :column "_TABLE_SUFFIX"}]
Expand Down
4 changes: 3 additions & 1 deletion test/resources/acceptance/no_source_columns.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{:tables #{{:table "foo"}
{:table "bar"}}
:source-columns #{}}
:source-columns #{}
:overrides
{:basic-select {:error :macaw.error/unsupported-expression}}}
33 changes: 28 additions & 5 deletions test/resources/acceptance/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ FROM employees
WHERE salary > 60000
GROUP BY department;

-- FIXTURE: cycle/cte
-- FIXTURE: compound/cycle-cte
-- we eventually want to check that the fields are cycled twice in the attribution of the final outputs.
WITH b AS (
SELECT
Expand All @@ -182,7 +182,7 @@ SELECT
z
FROM c;

-- FIXTURE: duplicate-scopes
-- FIXTURE: compound/duplicate-scopes
-- This is a minimal example to illustrate why we need to put an id on each scope.
-- As we add more complex compound query tests this will become redundant, and we can then delete it
-- we eventually want to check that the fields are cycled twice in the attribution of the final outputs.
Expand All @@ -193,7 +193,7 @@ SELECT
c.x
FROM b, c;

-- FIXTURE: generate-series
-- FIXTURE: dynamic/generate-series
SELECT t.day::date AS date
FROM generate_series(timestamp '2021-01-01', now(), interval '1 day') AS t(day)

Expand Down Expand Up @@ -225,7 +225,7 @@ with final as (

select * from final

-- FIXTURE: shadow/subselect
-- FIXTURE: compound/shadow-subselect
SELECT
e.id,
e.name,
Expand All @@ -240,7 +240,7 @@ FROM (
GROUP BY first_name, last_name, department_id
) e JOIN departments d ON d.id = e.department_id;

-- FIXTURE: simple/select-into
-- FIXTURE: mutation/select-into
SELECT id, name
INTO new_user_summary
FROM user;
Expand Down Expand Up @@ -294,3 +294,26 @@ EXEC sp_executesql @SQL

-- FIXTURE: string-concat
SELECT x || y AS z FROM t

-- FIXTURE: mutation/alter-table
ALTER TABLE users
ADD COLUMN email VARCHAR(255);

-- FIXTURE: mutation/drop-table
DROP TABLE IF EXISTS users;

-- FIXTURE: mutation/truncate-table
TRUNCATE TABLE users;

-- FIXTURE: mutation/insert
INSERT INTO users (name, email)
VALUES ('Alice', '[email protected]');

-- FIXTURE: mutation/update
UPDATE users
SET email = '[email protected]'
WHERE name = 'Alice';

-- FIXTURE: mutation/delete
DELETE FROM users
WHERE name = 'Alice';
4 changes: 3 additions & 1 deletion test/resources/acceptance/reserved__final.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{:source-columns [{:table "invoice" :column "amount_paid_cents"}
{:table "invoice" :column "id"}
{:table "invoice" :column "is_deleted"}]
:tables [{:table "invoice"}]}
:tables [{:table "invoice"}]
:overrides
{:basic-select :macaw.error/unsupported-expression}}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{:has-wildcard? true
{:has-wildcard? #{true}
:source-columns []
:tables [{:table "t"}]}
4 changes: 2 additions & 2 deletions test/resources/acceptance/sqlserver__execute.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{:error :macaw.error/illegal-expression
:overrides :macaw.error/unable-to-parse}
{:error :macaw.error/invalid-query
:overrides {:ast-walker-1 :macaw.error/unable-to-parse}}
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{:error :macaw.error/illegal-expression
:overrides :macaw.error/unable-to-parse}
{:error :macaw.error/invalid-query
:overrides {:ast-walker-1 :macaw.error/unable-to-parse}}
Loading