From eff3c2f253be1a0356b30398985b3f2cb5f7f672 Mon Sep 17 00:00:00 2001 From: Dave Bauman Date: Fri, 14 Jun 2024 10:41:28 -0400 Subject: [PATCH 1/4] feat: Apply row limit transform to query in backend --- .../server/datasources/query_execution.py | 11 ++++++- querybook/server/logic/admin.py | 12 +++++++ querybook/server/tasks/run_datadoc.py | 11 +++++++ .../DataDocQueryCell/DataDocQueryCell.tsx | 5 ++- .../QueryComposer/QueryComposer.tsx | 8 +++-- .../components/QueryComposer/RunQuery.tsx | 32 +++++++------------ querybook/webapp/redux/dataDoc/action.ts | 3 ++ .../webapp/redux/queryExecutions/action.ts | 2 ++ querybook/webapp/resource/queryExecution.ts | 8 ++++- 9 files changed, 67 insertions(+), 25 deletions(-) diff --git a/querybook/server/datasources/query_execution.py b/querybook/server/datasources/query_execution.py index 8c3515cdb..42b30d792 100644 --- a/querybook/server/datasources/query_execution.py +++ b/querybook/server/datasources/query_execution.py @@ -13,6 +13,7 @@ ) from clients.common import FileDoesNotExist from lib.export.all_exporters import ALL_EXPORTERS, get_exporter +from lib.query_analysis.transform import transform_to_limited_query from lib.result_store import GenericReader from lib.query_analysis.templating import ( QueryTemplatingError, @@ -61,10 +62,18 @@ @register("/query_execution/", methods=["POST"]) -def create_query_execution(query, engine_id, data_cell_id=None, originator=None): +def create_query_execution( + query, engine_id, row_limit=-1, data_cell_id=None, originator=None +): with DBSession() as session: verify_query_engine_permission(engine_id, session=session) + row_limit_enabled = admin_logic.get_engine_feature_param( + engine_id, "row_limit", False, session=session + ) + if row_limit_enabled and row_limit >= 0: + query = transform_to_limited_query(query, row_limit) + uid = current_user.id query_execution = logic.create_query_execution( query=query, engine_id=engine_id, uid=uid, session=session diff --git a/querybook/server/logic/admin.py b/querybook/server/logic/admin.py index c570041db..b08be57d4 100644 --- a/querybook/server/logic/admin.py +++ b/querybook/server/logic/admin.py @@ -187,6 +187,18 @@ def get_admin_announcements(session=None): ) +@with_session +def get_engine_feature_param( + engine_id, feature_param_name, default_value=None, session=None +): + query_engine = get_query_engine_by_id(engine_id, session=session) + return ( + query_engine.get_feature_params().get(feature_param_name, default_value) + if query_engine + else default_value + ) + + """ --------------------------------------------------------------------------------------------------------- QUERY METASTORE ? diff --git a/querybook/server/tasks/run_datadoc.py b/querybook/server/tasks/run_datadoc.py index 0f109759a..abf0821cb 100644 --- a/querybook/server/tasks/run_datadoc.py +++ b/querybook/server/tasks/run_datadoc.py @@ -11,10 +11,12 @@ from lib.logger import get_logger from lib.query_analysis.templating import render_templated_query +from lib.query_analysis.transform import transform_to_limited_query from lib.scheduled_datadoc.export import export_datadoc from lib.scheduled_datadoc.legacy import convert_if_legacy_datadoc_schedule from lib.scheduled_datadoc.notification import notifiy_on_datadoc_complete +from logic import admin as admin_logic from logic import datadoc as datadoc_logic from logic import query_execution as qe_logic from logic.schedule import ( @@ -73,6 +75,7 @@ def run_datadoc_with_config( # Prepping chain jobs each unit is a [make_qe_task, run_query_task] combo for index, query_cell in enumerate(query_cells): engine_id = query_cell.meta["engine"] + limit = query_cell.meta.get("limit", -1) raw_query = query_cell.context # Skip empty cells @@ -86,6 +89,14 @@ def run_datadoc_with_config( engine_id, session=session, ) + + # If meta["limit"] is set and > 0, apply limit to the query + row_limit_enabled = admin_logic.get_engine_feature_param( + engine_id, "row_limit", False, session=session + ) + if row_limit_enabled and limit >= 0: + query = transform_to_limited_query(query, limit) + except Exception as e: on_datadoc_completion( is_success=False, diff --git a/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx b/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx index 7946816c6..cc073880b 100644 --- a/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx +++ b/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx @@ -477,6 +477,7 @@ class DataDocQueryCellComponent extends React.PureComponent { await this.props.createQueryExecution( query, engineId, + this.rowLimit, this.props.cellId ) ).id; @@ -548,6 +549,7 @@ class DataDocQueryCellComponent extends React.PureComponent { return this.props.createQueryExecution( renderedQuery, this.engineId, + this.rowLimit, this.props.cellId ); } @@ -1092,8 +1094,9 @@ function mapDispatchToProps(dispatch: Dispatch) { createQueryExecution: ( query: string, engineId: number, + rowLimit: number, cellId: number - ) => dispatch(createQueryExecution(query, engineId, cellId)), + ) => dispatch(createQueryExecution(query, engineId, rowLimit, cellId)), setTableSidebarId: (id: number) => dispatch(setSidebarTableId(id)), diff --git a/querybook/webapp/components/QueryComposer/QueryComposer.tsx b/querybook/webapp/components/QueryComposer/QueryComposer.tsx index e9c5224f7..3fd4c7335 100644 --- a/querybook/webapp/components/QueryComposer/QueryComposer.tsx +++ b/querybook/webapp/components/QueryComposer/QueryComposer.tsx @@ -42,7 +42,7 @@ import { useTrackView } from 'hooks/useTrackView'; import { trackClick } from 'lib/analytics'; import { createSQLLinter } from 'lib/codemirror/codemirror-lint'; import { replaceStringIndices, searchText } from 'lib/data-doc/search'; -import { getSelectedQuery, IRange, TableToken } from 'lib/sql-helper/sql-lexer'; +import { getSelectedQuery, IRange } from 'lib/sql-helper/sql-lexer'; import { DEFAULT_ROW_LIMIT } from 'lib/sql-helper/sql-limiter'; import { getPossibleTranspilers } from 'lib/templated-query/transpile'; import { enableResizable, getQueryEngineId, sleep } from 'lib/utils'; @@ -524,7 +524,11 @@ const QueryComposer: React.FC = () => { engine.id, async (query, engineId) => { const data = await dispatch( - queryExecutionsAction.createQueryExecution(query, engineId) + queryExecutionsAction.createQueryExecution( + query, + engineId, + rowLimit + ) ); return data.id; } diff --git a/querybook/webapp/components/QueryComposer/RunQuery.tsx b/querybook/webapp/components/QueryComposer/RunQuery.tsx index 5b67daec5..8cf801b82 100644 --- a/querybook/webapp/components/QueryComposer/RunQuery.tsx +++ b/querybook/webapp/components/QueryComposer/RunQuery.tsx @@ -5,10 +5,7 @@ import { ISamplingTables, TDataDocMetaVariables } from 'const/datadoc'; import { IQueryEngine } from 'const/queryEngine'; import { sendConfirm } from 'lib/querybookUI'; import { getDroppedTables } from 'lib/sql-helper/sql-checker'; -import { - getLimitedQuery, - hasQueryContainUnlimitedSelect, -} from 'lib/sql-helper/sql-limiter'; +import { hasQueryContainUnlimitedSelect } from 'lib/sql-helper/sql-limiter'; import { renderTemplatedQuery } from 'lib/templated-query'; import { Nullable } from 'lib/typescript'; import { formatError } from 'lib/utils/error'; @@ -46,13 +43,9 @@ export async function transformQuery( sampleRate ); - const limitedQuery = await transformLimitedQuery( - sampledQuery, - rowLimit, - engine - ); + await checkUnlimitedQuery(sampledQuery, rowLimit, engine); - return limitedQuery; + return sampledQuery; } export async function runQuery( @@ -87,17 +80,16 @@ async function transformTemplatedQuery( } } -async function transformLimitedQuery( +async function checkUnlimitedQuery( query: string, rowLimit: Nullable, engine: IQueryEngine ) { - if (!engine.feature_params?.row_limit) { - return query; - } - - if (rowLimit != null && rowLimit >= 0) { - return getLimitedQuery(query, rowLimit, engine.language); + if ( + !engine.feature_params?.row_limit || + (rowLimit != null && rowLimit >= 0) + ) { + return; } // query is unlimited but engine has row limit feature turned on @@ -108,11 +100,11 @@ async function transformLimitedQuery( ); if (!unlimitedSelectQuery) { - return query; + return; } // Show a warning modal to let user confirm what they are doing - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { sendConfirm({ header: 'Your SELECT query is unbounded', message: ( @@ -135,7 +127,7 @@ async function transformLimitedQuery( ), - onConfirm: () => resolve(query), + onConfirm: () => resolve(), onDismiss: () => reject(), confirmText: 'Run without LIMIT', }); diff --git a/querybook/webapp/redux/dataDoc/action.ts b/querybook/webapp/redux/dataDoc/action.ts index 7886bb1c9..ce08c7a39 100644 --- a/querybook/webapp/redux/dataDoc/action.ts +++ b/querybook/webapp/redux/dataDoc/action.ts @@ -26,6 +26,7 @@ import { } from 'lib/data-doc/datadoc-permission'; import dataDocSocket from 'lib/data-doc/datadoc-socketio'; import { convertRawToContentState } from 'lib/richtext/serialize'; +import { DEFAULT_ROW_LIMIT } from 'lib/sql-helper/sql-limiter'; import { getQueryEngineId } from 'lib/utils'; import { DataDocAccessRequestResource, @@ -308,9 +309,11 @@ export function insertDataDocCell( queryEngineIds ); const engine = meta?.['engine'] ?? defaultQueryEngine; + const limit = meta?.['limit'] ?? DEFAULT_ROW_LIMIT; meta = { ...meta, engine, + limit, }; } diff --git a/querybook/webapp/redux/queryExecutions/action.ts b/querybook/webapp/redux/queryExecutions/action.ts index d1e365504..1fef726f9 100644 --- a/querybook/webapp/redux/queryExecutions/action.ts +++ b/querybook/webapp/redux/queryExecutions/action.ts @@ -378,6 +378,7 @@ export function pollQueryExecution( export function createQueryExecution( query: string, engineId?: number, + rowLimit?: number, cellId?: number ): ThunkResult> { return async (dispatch, getState) => { @@ -387,6 +388,7 @@ export function createQueryExecution( const { data: queryExecution } = await QueryExecutionResource.create( query, selectedEngineId, + rowLimit, cellId ); dispatch(receiveQueryExecution(queryExecution, cellId)); diff --git a/querybook/webapp/resource/queryExecution.ts b/querybook/webapp/resource/queryExecution.ts index e595f077b..27b1adf0d 100644 --- a/querybook/webapp/resource/queryExecution.ts +++ b/querybook/webapp/resource/queryExecution.ts @@ -66,10 +66,16 @@ export const QueryExecutionResource = { environment_id: environmentId, }), - create: (query: string, engineId: number, cellId?: number) => { + create: ( + query: string, + engineId: number, + rowLimit?: number, + cellId?: number + ) => { const params = { query, engine_id: engineId, + row_limit: rowLimit, }; if (cellId != null) { From 1164dde3c2e323d6dabeb0c33e738eb3480539cd Mon Sep 17 00:00:00 2001 From: Dave Bauman Date: Thu, 20 Jun 2024 12:50:18 -0400 Subject: [PATCH 2/4] fix: PR comments --- .../server/datasources/query_execution.py | 11 +- .../server/datasources/query_transform.py | 19 +++ .../server/lib/query_analysis/transform.py | 20 ++- .../test_query_analysis/test_transform.py | 45 ++++++ .../lib/sql-helper/sql-limiter.test.ts | 128 +----------------- .../DataDocQueryCell/DataDocQueryCell.tsx | 5 +- .../QueryComposer/QueryComposer.tsx | 6 +- .../components/QueryComposer/RunQuery.tsx | 34 ++--- querybook/webapp/const/queryTransform.ts | 4 + .../webapp/lib/sql-helper/sql-limiter.ts | 56 -------- .../webapp/redux/queryExecutions/action.ts | 2 - querybook/webapp/resource/queryExecution.ts | 8 +- querybook/webapp/resource/queryTransform.ts | 13 ++ 13 files changed, 119 insertions(+), 232 deletions(-) create mode 100644 querybook/webapp/const/queryTransform.ts diff --git a/querybook/server/datasources/query_execution.py b/querybook/server/datasources/query_execution.py index 42b30d792..8c3515cdb 100644 --- a/querybook/server/datasources/query_execution.py +++ b/querybook/server/datasources/query_execution.py @@ -13,7 +13,6 @@ ) from clients.common import FileDoesNotExist from lib.export.all_exporters import ALL_EXPORTERS, get_exporter -from lib.query_analysis.transform import transform_to_limited_query from lib.result_store import GenericReader from lib.query_analysis.templating import ( QueryTemplatingError, @@ -62,18 +61,10 @@ @register("/query_execution/", methods=["POST"]) -def create_query_execution( - query, engine_id, row_limit=-1, data_cell_id=None, originator=None -): +def create_query_execution(query, engine_id, data_cell_id=None, originator=None): with DBSession() as session: verify_query_engine_permission(engine_id, session=session) - row_limit_enabled = admin_logic.get_engine_feature_param( - engine_id, "row_limit", False, session=session - ) - if row_limit_enabled and row_limit >= 0: - query = transform_to_limited_query(query, row_limit) - uid = current_user.id query_execution = logic.create_query_execution( query=query, engine_id=engine_id, uid=uid, session=session diff --git a/querybook/server/datasources/query_transform.py b/querybook/server/datasources/query_transform.py index df2f0b44a..c0260fabe 100644 --- a/querybook/server/datasources/query_transform.py +++ b/querybook/server/datasources/query_transform.py @@ -1,9 +1,28 @@ from app.datasource import register from lib.query_analysis.transform import ( + has_query_contains_unlimited_select, + transform_to_limited_query, transform_to_sampled_query, ) +@register("/query/transform/limited/", methods=["POST"]) +def query_limited( + query: str, + row_limit: int, + language: str, +): + limited_query = transform_to_limited_query( + query=query, limit=row_limit, language=language + ) + + unlimited_select = has_query_contains_unlimited_select( + query=limited_query, language=language + ) + + return {"query": limited_query, "unlimited_select": unlimited_select} + + @register("/query/transform/sampling/", methods=["POST"]) def query_sampling( query: str, diff --git a/querybook/server/lib/query_analysis/transform.py b/querybook/server/lib/query_analysis/transform.py index 4299a6fa5..cc6147439 100644 --- a/querybook/server/lib/query_analysis/transform.py +++ b/querybook/server/lib/query_analysis/transform.py @@ -67,15 +67,23 @@ def get_limited_select_statement(statement_ast: exp.Expression, limit: int): return statement_ast.limit(limit) -def has_query_contains_unlimited_select(query: str, language: str) -> bool: +def has_query_contains_unlimited_select(query: str, language: str = None): """Check if a query contains a select statement without a limit. Args: query: The query to check Returns: - bool: True if the query contains a select statement without a limit, False otherwise + str: The first select statement without a limit. None if all select statements have a limit. """ - statements = parse(query, dialect=_get_sqlglot_dialect[language]) - return any(get_select_statement_limit(s) == -1 for s in statements) + dialect = _get_sqlglot_dialect(language) + statements = parse(query, dialect) + return next( + ( + s.sql(dialect=dialect, pretty=True) + for s in statements + if get_select_statement_limit(s) == -1 + ), + None, + ) def transform_to_limited_query( @@ -83,8 +91,10 @@ def transform_to_limited_query( ) -> str: """Apply a limit to all select statements in a query if they don't already have a limit. It returns a new query with the limit applied and the original query is not modified. + + If limit is None or negative, the query is returned as-is. """ - if not limit: + if not limit or limit < 0: return query try: diff --git a/querybook/tests/test_lib/test_query_analysis/test_transform.py b/querybook/tests/test_lib/test_query_analysis/test_transform.py index de6c8bfd1..06a647131 100644 --- a/querybook/tests/test_lib/test_query_analysis/test_transform.py +++ b/querybook/tests/test_lib/test_query_analysis/test_transform.py @@ -3,6 +3,7 @@ from lib.query_analysis.transform import ( format_query, get_select_statement_limit, + has_query_contains_unlimited_select, transform_to_limited_query, transform_to_sampled_query, ) @@ -46,6 +47,50 @@ def test_select_with_limit(self): self.assertEqual(get_select_statement_limit(query), expected) +class HasQueryContainsUnlimitedSelectTestCase(TestCase): + def test_select_limit(self): + tests = [ + "SELECT 1 LIMIT 10", + "SELECT * FROM table_1 WHERE field = 1 LIMIT 10", + "TRUNCATE TABLE table_1; SELECT * FROM table_1 WHERE field = 1 LIMIT 1000", + "SELECT * FROM table_1 WHERE field = 1 LIMIT 10; SELECT * FROM table_2 WHERE field = 1 LIMIT 1000", + ] + for query in tests: + with self.subTest(query=query): + self.assertIsNone(has_query_contains_unlimited_select(query)) + + def test_select_no_limit(self): + tests = [ + ("SELECT 1", "SELECT\n 1"), + ( + "SELECT * FROM table_1 WHERE field = 1", + "SELECT\n *\nFROM table_1\nWHERE\n field = 1", + ), + ("SELECT 1; SELECT 2", "SELECT\n 1"), + ( + "SELECT * FROM table_1 WHERE field = 1 LIMIT 10; SELECT * FROM table_1 WHERE field = 1", + "SELECT\n *\nFROM table_1\nWHERE\n field = 1", + ), + ] + for query, expected in tests: + with self.subTest(query=query): + self.assertEquals(has_query_contains_unlimited_select(query), expected) + + def test_not_select_statements(self): + tests = [ + "DELETE FROM table_1 WHERE field = 1", + "CREATE DATABASE IF NOT EXISTS db_1", + "CREATE TABLE table_1 (field1 INT)", + "TRUNCATE TABLE table_1", + "DROP TABLE IF EXISTS db.table1; CREATE TABLE db.table1", + "INSERT INTO table_1 (field1) VALUES (1)", + "UPDATE table_1 SET field1 = 1 WHERE field = 1", + ] + for query in tests: + with self.subTest(query=query): + self.assertIsNone(has_query_contains_unlimited_select(query)) + + class GetLimitedQueryTestCase(TestCase): def test_limit_is_not_specified(self): tests = [ diff --git a/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts b/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts index 54d4349e2..0da7de6fd 100644 --- a/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts +++ b/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts @@ -1,7 +1,4 @@ -import { - getLimitedQuery, - getSelectStatementLimit, -} from 'lib/sql-helper/sql-limiter'; +import { getSelectStatementLimit } from 'lib/sql-helper/sql-limiter'; describe('getSelectStatementLimit', () => { describe('when it is not a SELECT statement', () => { @@ -76,126 +73,3 @@ describe('getSelectStatementLimit', () => { }); }); }); - -describe('getLimitedQuery', () => { - describe('not limited', () => { - test('when rowLimit is not specified', () => { - const query = 'SELECT * FROM table_1 WHERE field = 1;'; - expect(getLimitedQuery(query)).toBe(query); - }); - test('when rowLimit is not specified for multiple queries', () => { - const query = ` - SELECT * FROM table_1 WHERE field = 1; - SELECT * FROM table_1 WHERE field = 1; - `; - expect(getLimitedQuery(query)).toBe(query); - }); - test('when running a select query with fetch', () => { - const query = - 'SELECT * FROM table_1 ORDER BY id FETCH FIRST 10 ROWS ONLY;'; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - test('when running a select query with offset and fetch', () => { - const query = - 'SELECT * FROM table_1 ORDER BY id OFFSET 10 FETCH NEXT 10 ROWS ONLY;'; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - test('when running a select query with nested query', () => { - const query = `select * from (select * from table limit 5) as x limit 10`; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - test('when running a select query with a where clause and a limit', () => { - const query = 'SELECT * FROM table_1 WHERE field = 1 LIMIT 1000;'; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - }); - describe('limited', () => { - test('when running a select query', () => { - const query = 'SELECT * FROM table_1'; - expect(getLimitedQuery(query, 10)).toBe(`${query} limit 10;`); - }); - test('when running a select query with a where clause and a group by and an order by', () => { - const query = - 'SELECT field, count(*) FROM table_1 WHERE deleted = false GROUP BY field ORDER BY field'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with trailing semicolon', () => { - const query = 'SELECT * FROM table_1;'; - expect(getLimitedQuery(query, 10)).toBe( - 'SELECT * FROM table_1 limit 10;' - ); - }); - test('when running a select query with comments', () => { - const query = 'SELECT * FROM table_1 -- limit here'; - expect(getLimitedQuery(query, 10)).toBe( - 'SELECT * FROM table_1 limit 10;' - ); - }); - test('when running a select query with non-keyword limits', () => { - const query = `SELECT id, account, 'limit' FROM querybook2.limit ORDER by 'limit' ASC`; - expect(getLimitedQuery(query, 10)).toBe(`${query} limit 10;`); - }); - test('when running a multiple select queries', () => { - const query = `SELECT * FROM table_1; -SELECT col1, col2, FROM table2;`; - expect(getLimitedQuery(query, 10)).toBe( - `SELECT * FROM table_1 limit 10; -SELECT col1, col2, FROM table2 limit 10;` - ); - }); - test('when running a select query with a where clause', () => { - const query = 'SELECT * FROM table_1 WHERE field = 1'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with a where clause and an order by', () => { - const query = - 'SELECT * FROM table_1 WHERE field = 1 ORDER BY field'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with a where clause and a group by and an order by', () => { - const query = - 'SELECT field, count(*) FROM table_1 WHERE deleted = false GROUP BY field ORDER BY field'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running two select queries with mixed limits', () => { - const query = `SELECT * FROM table_1; -SELECT col1, col2, FROM table2 LIMIT 300;`; - expect(getLimitedQuery(query, 10)) - .toBe(`SELECT * FROM table_1 limit 10; -SELECT col1, col2, FROM table2 LIMIT 300;`); - }); - test('when running multiple select queries with mixed limits', () => { - const query = `SELECT * FROM table_1; -SELECT col1, col2, FROM table2 LIMIT 300; -SELECT field, count(1) FROM table3 GROUP BY field`; - expect(getLimitedQuery(query, 10)) - .toBe(`SELECT * FROM table_1 limit 10; -SELECT col1, col2, FROM table2 LIMIT 300; -SELECT field, count(1) FROM table3 GROUP BY field limit 10;`); - }); - test('when running a select query with nested query', () => { - const query = `select * from (select * from table limit 5) as x`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with wrapped where', () => { - const query = `select * from table where (field = 1 and field2 = 2)`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two nested queries', () => { - const query = `select * from (select * from table limit 5) as x outer join (select * from table2 limit 5) as y on x.id = y.id`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two nested queries', () => { - const query = `select * from (select * from table limit 5) as x outer join (select * from table2 limit 5) as y on x.id = y.id`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two union queries', () => { - const query = `select id, name from table_a union all select id, name from table_b where (deleted = false and active = true)`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two nested union queries', () => { - const query = `(select id, name from table_a limit 10) union all (select id, name from table_b where (deleted = false and active = true))`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - }); -}); diff --git a/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx b/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx index cc073880b..7946816c6 100644 --- a/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx +++ b/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx @@ -477,7 +477,6 @@ class DataDocQueryCellComponent extends React.PureComponent { await this.props.createQueryExecution( query, engineId, - this.rowLimit, this.props.cellId ) ).id; @@ -549,7 +548,6 @@ class DataDocQueryCellComponent extends React.PureComponent { return this.props.createQueryExecution( renderedQuery, this.engineId, - this.rowLimit, this.props.cellId ); } @@ -1094,9 +1092,8 @@ function mapDispatchToProps(dispatch: Dispatch) { createQueryExecution: ( query: string, engineId: number, - rowLimit: number, cellId: number - ) => dispatch(createQueryExecution(query, engineId, rowLimit, cellId)), + ) => dispatch(createQueryExecution(query, engineId, cellId)), setTableSidebarId: (id: number) => dispatch(setSidebarTableId(id)), diff --git a/querybook/webapp/components/QueryComposer/QueryComposer.tsx b/querybook/webapp/components/QueryComposer/QueryComposer.tsx index 3fd4c7335..1223802c1 100644 --- a/querybook/webapp/components/QueryComposer/QueryComposer.tsx +++ b/querybook/webapp/components/QueryComposer/QueryComposer.tsx @@ -524,11 +524,7 @@ const QueryComposer: React.FC = () => { engine.id, async (query, engineId) => { const data = await dispatch( - queryExecutionsAction.createQueryExecution( - query, - engineId, - rowLimit - ) + queryExecutionsAction.createQueryExecution(query, engineId) ); return data.id; } diff --git a/querybook/webapp/components/QueryComposer/RunQuery.tsx b/querybook/webapp/components/QueryComposer/RunQuery.tsx index 8cf801b82..c8c503f47 100644 --- a/querybook/webapp/components/QueryComposer/RunQuery.tsx +++ b/querybook/webapp/components/QueryComposer/RunQuery.tsx @@ -5,7 +5,6 @@ import { ISamplingTables, TDataDocMetaVariables } from 'const/datadoc'; import { IQueryEngine } from 'const/queryEngine'; import { sendConfirm } from 'lib/querybookUI'; import { getDroppedTables } from 'lib/sql-helper/sql-checker'; -import { hasQueryContainUnlimitedSelect } from 'lib/sql-helper/sql-limiter'; import { renderTemplatedQuery } from 'lib/templated-query'; import { Nullable } from 'lib/typescript'; import { formatError } from 'lib/utils/error'; @@ -43,9 +42,13 @@ export async function transformQuery( sampleRate ); - await checkUnlimitedQuery(sampledQuery, rowLimit, engine); + const limitedQuery = await transformLimitedQuery( + sampledQuery, + rowLimit, + engine + ); - return sampledQuery; + return limitedQuery; } export async function runQuery( @@ -80,31 +83,30 @@ async function transformTemplatedQuery( } } -async function checkUnlimitedQuery( +async function transformLimitedQuery( query: string, rowLimit: Nullable, engine: IQueryEngine ) { - if ( - !engine.feature_params?.row_limit || - (rowLimit != null && rowLimit >= 0) - ) { - return; + if (!engine.feature_params?.row_limit) { + return query; } - // query is unlimited but engine has row limit feature turned on - - const unlimitedSelectQuery = hasQueryContainUnlimitedSelect( + const { data } = await QueryTransformResource.getLimitedQuery( query, - engine.language + engine.language, + rowLimit ?? -1 ); + const { query: limitedQuery, unlimited_select: unlimitedSelectQuery } = + data; + if (!unlimitedSelectQuery) { - return; + return limitedQuery; } // Show a warning modal to let user confirm what they are doing - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { sendConfirm({ header: 'Your SELECT query is unbounded', message: ( @@ -127,7 +129,7 @@ async function checkUnlimitedQuery( ), - onConfirm: () => resolve(), + onConfirm: () => resolve(limitedQuery), onDismiss: () => reject(), confirmText: 'Run without LIMIT', }); diff --git a/querybook/webapp/const/queryTransform.ts b/querybook/webapp/const/queryTransform.ts new file mode 100644 index 000000000..4fee6aebf --- /dev/null +++ b/querybook/webapp/const/queryTransform.ts @@ -0,0 +1,4 @@ +export interface IQueryTransformLimited { + query: string; + unlimited_select: null | string; +} diff --git a/querybook/webapp/lib/sql-helper/sql-limiter.ts b/querybook/webapp/lib/sql-helper/sql-limiter.ts index 34a87df43..816329921 100644 --- a/querybook/webapp/lib/sql-helper/sql-limiter.ts +++ b/querybook/webapp/lib/sql-helper/sql-limiter.ts @@ -1,5 +1,4 @@ import { - getStatementsFromQuery, getStatementType, IToken, simpleParse, @@ -67,61 +66,6 @@ export function getSelectStatementLimit( return -1; } -/** - * Check if the query has any statements that is SELECT and does not have LIMIT - * If so, return the unlimited select statement, else, return null - * - * @param query - * @param language - */ -export function hasQueryContainUnlimitedSelect( - query: string, - language?: string -): string { - const statements = getStatementsFromQuery(query, language); - - return statements.find( - (statement) => getSelectStatementLimit(statement, language) === -1 - ); -} - -/** - * Automatically apply a limit to a query that does not already have a limit. - * - * @param {string} query - Query to be executed. - * @param {number} rowLimit - Number of rows to limit the query to. - * @param {string} language - Language of the query. - * @return {string} - Query with limit applied (if necessary). - */ -export function getLimitedQuery( - query: string, - rowLimit?: number, - language?: string -): string { - if (rowLimit == null) { - return query; - } - - const statements = getStatementsFromQuery(query, language); - - let addedLimit = false; - const updatedQuery = statements - .map((statement) => { - const existingLimit = getSelectStatementLimit(statement, language); - if (existingLimit == null || existingLimit >= 0) { - return statement + ';'; - } - - addedLimit = true; - return `${statement} limit ${rowLimit};`; - }) - .join('\n'); - - // If no limit was added, return the original query - // to avoid changing whitespace, etc. - return addedLimit ? updatedQuery : query; -} - // 10^1 to 10^5 export const ROW_LIMIT_SCALE = window.ROW_LIMIT_SCALE ?? [1, 2, 3, 4, 5].map((v) => Math.pow(10, v)); diff --git a/querybook/webapp/redux/queryExecutions/action.ts b/querybook/webapp/redux/queryExecutions/action.ts index 1fef726f9..d1e365504 100644 --- a/querybook/webapp/redux/queryExecutions/action.ts +++ b/querybook/webapp/redux/queryExecutions/action.ts @@ -378,7 +378,6 @@ export function pollQueryExecution( export function createQueryExecution( query: string, engineId?: number, - rowLimit?: number, cellId?: number ): ThunkResult> { return async (dispatch, getState) => { @@ -388,7 +387,6 @@ export function createQueryExecution( const { data: queryExecution } = await QueryExecutionResource.create( query, selectedEngineId, - rowLimit, cellId ); dispatch(receiveQueryExecution(queryExecution, cellId)); diff --git a/querybook/webapp/resource/queryExecution.ts b/querybook/webapp/resource/queryExecution.ts index 27b1adf0d..e595f077b 100644 --- a/querybook/webapp/resource/queryExecution.ts +++ b/querybook/webapp/resource/queryExecution.ts @@ -66,16 +66,10 @@ export const QueryExecutionResource = { environment_id: environmentId, }), - create: ( - query: string, - engineId: number, - rowLimit?: number, - cellId?: number - ) => { + create: (query: string, engineId: number, cellId?: number) => { const params = { query, engine_id: engineId, - row_limit: rowLimit, }; if (cellId != null) { diff --git a/querybook/webapp/resource/queryTransform.ts b/querybook/webapp/resource/queryTransform.ts index 064f66ab8..47a3e5fc9 100644 --- a/querybook/webapp/resource/queryTransform.ts +++ b/querybook/webapp/resource/queryTransform.ts @@ -1,6 +1,19 @@ +import { IQueryTransformLimited } from 'const/queryTransform'; import ds from 'lib/datasource'; export const QueryTransformResource = { + getLimitedQuery: (query: string, language: string, row_limit: number) => + ds.save( + '/query/transform/limited/', + { + query, + language, + row_limit, + }, + { + notifyOnError: false, + } + ), getSampledQuery: ( query: string, language: string, From fcac7c9b250ebd838ff2a4937033eb14177aa456 Mon Sep 17 00:00:00 2001 From: Dave Bauman Date: Thu, 20 Jun 2024 14:21:31 -0400 Subject: [PATCH 3/4] fix: Unified transform endpoint for sampling/limiting --- .../server/datasources/query_transform.py | 22 ++++ .../server/lib/query_analysis/transform.py | 3 + .../components/QueryComposer/RunQuery.tsx | 114 ++++++------------ querybook/webapp/resource/queryTransform.ts | 21 ++++ 4 files changed, 83 insertions(+), 77 deletions(-) diff --git a/querybook/server/datasources/query_transform.py b/querybook/server/datasources/query_transform.py index c0260fabe..4045023c7 100644 --- a/querybook/server/datasources/query_transform.py +++ b/querybook/server/datasources/query_transform.py @@ -32,3 +32,25 @@ def query_sampling( return transform_to_sampled_query( query=query, language=language, sampling_tables=sampling_tables ) + + +@register("/query/transform/", methods=["POST"]) +def query_transform( + query: str, + language: str, + row_limit: int, + sampling_tables: dict[str, dict[str, str]], +): + sampled_query = transform_to_sampled_query( + query=query, language=language, sampling_tables=sampling_tables + ) + + limited_query = transform_to_limited_query( + query=sampled_query, limit=row_limit, language=language + ) + + unlimited_select = has_query_contains_unlimited_select( + query=limited_query, language=language + ) + + return {"query": limited_query, "unlimited_select": unlimited_select} diff --git a/querybook/server/lib/query_analysis/transform.py b/querybook/server/lib/query_analysis/transform.py index cc6147439..3e87de9d6 100644 --- a/querybook/server/lib/query_analysis/transform.py +++ b/querybook/server/lib/query_analysis/transform.py @@ -163,6 +163,9 @@ def transform_to_sampled_query( Returns: str: The sampled query """ + if not sampling_tables: + return query + try: dialect = _get_sqlglot_dialect(language) statements = parse(query, dialect=dialect) diff --git a/querybook/webapp/components/QueryComposer/RunQuery.tsx b/querybook/webapp/components/QueryComposer/RunQuery.tsx index c8c503f47..170486562 100644 --- a/querybook/webapp/components/QueryComposer/RunQuery.tsx +++ b/querybook/webapp/components/QueryComposer/RunQuery.tsx @@ -35,77 +35,23 @@ export async function transformQuery( return ''; } - const sampledQuery = await transformTableSamplingQuery( - templatizedQuery, - language, - samplingTables, - sampleRate - ); - - const limitedQuery = await transformLimitedQuery( - sampledQuery, - rowLimit, - engine - ); - - return limitedQuery; -} - -export async function runQuery( - query: string, - engineId: number, - createQueryExecution: (query: string, engineId: number) => Promise -): Promise { - if (!query) { - return null; - } - const runQuery = () => createQueryExecution(query, engineId); - return confirmIfDroppingTablesThenRunQuery(runQuery, query); -} + const rowLimitEnabled = engine.feature_params?.row_limit ?? false; -async function transformTemplatedQuery( - query: string, - templatedVariables: TDataDocMetaVariables, - engineId: number -) { - try { - return await renderTemplatedQuery(query, templatedVariables, engineId); - } catch (e) { - toast.error( -
-

Failed to templatize query.

-

{formatError(e)}

-
, - { - duration: 5000, - } - ); - } -} - -async function transformLimitedQuery( - query: string, - rowLimit: Nullable, - engine: IQueryEngine -) { - if (!engine.feature_params?.row_limit) { - return query; - } - - const { data } = await QueryTransformResource.getLimitedQuery( + const { data } = await QueryTransformResource.getTransformedQuery( query, - engine.language, - rowLimit ?? -1 + language, + rowLimitEnabled ? rowLimit ?? -1 : -1, + sampleRate === 0 ? null : samplingTables ); - const { query: limitedQuery, unlimited_select: unlimitedSelectQuery } = + const { query: transformedQuery, unlimited_select: unlimitedSelectQuery } = data; - if (!unlimitedSelectQuery) { - return limitedQuery; + if (!unlimitedSelectQuery || !rowLimitEnabled) { + return transformedQuery; } - // Show a warning modal to let user confirm what they are doing + // Show a warning modal if the query is unbounded to let user confirm what they are doing return new Promise((resolve, reject) => { sendConfirm({ header: 'Your SELECT query is unbounded', @@ -129,29 +75,43 @@ async function transformLimitedQuery( ), - onConfirm: () => resolve(limitedQuery), + onConfirm: () => resolve(transformedQuery), onDismiss: () => reject(), confirmText: 'Run without LIMIT', }); }); } -async function transformTableSamplingQuery( +export async function runQuery( query: string, - language: string, - tables: Record, - sampleRate: Nullable -) { - if (sampleRate == null || sampleRate <= 0) { - return query; + engineId: number, + createQueryExecution: (query: string, engineId: number) => Promise +): Promise { + if (!query) { + return null; } + const runQuery = () => createQueryExecution(query, engineId); + return confirmIfDroppingTablesThenRunQuery(runQuery, query); +} - const { data } = await QueryTransformResource.getSampledQuery( - query, - language, - tables - ); - return data; +async function transformTemplatedQuery( + query: string, + templatedVariables: TDataDocMetaVariables, + engineId: number +) { + try { + return await renderTemplatedQuery(query, templatedVariables, engineId); + } catch (e) { + toast.error( +
+

Failed to templatize query.

+

{formatError(e)}

+
, + { + duration: 5000, + } + ); + } } async function confirmIfDroppingTablesThenRunQuery( diff --git a/querybook/webapp/resource/queryTransform.ts b/querybook/webapp/resource/queryTransform.ts index 47a3e5fc9..77c8b7bc6 100644 --- a/querybook/webapp/resource/queryTransform.ts +++ b/querybook/webapp/resource/queryTransform.ts @@ -33,4 +33,25 @@ export const QueryTransformResource = { notifyOnError: false, } ), + getTransformedQuery: ( + query: string, + language: string, + row_limit: number, + sampling_tables: Record< + string, + { sampled_table?: string; sample_rate?: number } + > + ) => + ds.save( + '/query/transform/', + { + query, + language, + row_limit, + sampling_tables, + }, + { + notifyOnError: false, + } + ), }; From 844ae1eab1198bdbd4fd81ece1929dfb2c1b934e Mon Sep 17 00:00:00 2001 From: Dave Bauman Date: Fri, 21 Jun 2024 22:42:41 -0400 Subject: [PATCH 4/4] fix: Fix templatizedQuery --- querybook/webapp/components/QueryComposer/RunQuery.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/querybook/webapp/components/QueryComposer/RunQuery.tsx b/querybook/webapp/components/QueryComposer/RunQuery.tsx index 170486562..7e759e2f7 100644 --- a/querybook/webapp/components/QueryComposer/RunQuery.tsx +++ b/querybook/webapp/components/QueryComposer/RunQuery.tsx @@ -38,7 +38,7 @@ export async function transformQuery( const rowLimitEnabled = engine.feature_params?.row_limit ?? false; const { data } = await QueryTransformResource.getTransformedQuery( - query, + templatizedQuery, language, rowLimitEnabled ? rowLimit ?? -1 : -1, sampleRate === 0 ? null : samplingTables