diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index c6496d0158cc..88eea1b027bc 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -188,7 +188,8 @@ private static void validateGroupByClause(PinotQuery pinotQuery) aggregateExprCount++; } else if (hasGroupByClause && expressionOutsideGroupByList(selectExpression, groupByExprs)) { throw new SqlCompilationException( - "'" + RequestUtils.prettyPrint(selectExpression) + "' should appear in GROUP BY clause."); + "'" + RequestUtils.prettyPrint(selectExpression) + "' should be functionally dependent on the columns " + + "used in GROUP BY clause."); } } diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java index 2a51829f7ebf..ef64deb5434f 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java @@ -27,12 +27,11 @@ import org.apache.pinot.common.request.PinotQuery; import org.apache.pinot.common.utils.request.RequestUtils; import org.apache.pinot.sql.parsers.CalciteSqlParser; -import org.apache.pinot.sql.parsers.SqlCompilationException; - /** * Rewrite non-aggregation group-by query to distinct query. * The query can be rewritten only if select expression set and group-by expression set are the same. + * If they are not, the original query is returned. * * E.g. * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo @@ -41,7 +40,7 @@ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo * - * Unsupported queries: + * Not rewritten queries: * SELECT col1 FROM foo GROUP BY col1, col2 (not equivalent to SELECT DISTINCT col1 FROM foo) * SELECT col1 + col2 FROM foo GROUP BY col1, col2 (not equivalent to SELECT col1 + col2 FROM foo) */ @@ -76,6 +75,7 @@ public PinotQuery rewrite(PinotQuery pinotQuery) { } } Set groupByExpressions = new HashSet<>(pinotQuery.getGroupByList()); + // If SELECT and GROUP BY set are equal, rewrite the query as DISTINCT if (selectExpressions.equals(groupByExpressions)) { Expression distinct = RequestUtils.getFunctionExpression("distinct", pinotQuery.getSelectList()); // NOTE: Create an ArrayList because we might need to modify the list later @@ -85,9 +85,7 @@ public PinotQuery rewrite(PinotQuery pinotQuery) { pinotQuery.setGroupByList(null); return pinotQuery; } else { - throw new SqlCompilationException(String.format( - "For non-aggregation group-by query, select expression set and group-by expression set should be the same. " - + "Found select: %s, group-by: %s", selectExpressions, groupByExpressions)); + return pinotQuery; } } } diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index 7fb1f7ae1021..369dd8b886e5 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -1379,7 +1379,8 @@ public void testQueryValidation() { Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getMessage().contains("'group_city' should appear in GROUP BY clause.")); + Assert.assertTrue(e.getMessage().contains("'group_city' should be functionally dependent on the columns " + + "used in GROUP BY clause.")); } // Valid groupBy non-aggregate function should pass. @@ -1397,7 +1398,8 @@ public void testQueryValidation() { Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getMessage().contains("'secondsSinceEpoch' should appear in GROUP BY clause.")); + Assert.assertTrue(e.getMessage().contains("'secondsSinceEpoch' should be functionally dependent on the columns " + + "used in GROUP BY clause.")); } // Invalid groupBy clause shouldn't contain aggregate expression, like sum(rsvp_count), count(*). @@ -2592,16 +2594,36 @@ public void testNonAggregationGroupByQuery() { 5); } + @Test + public void testNonAggregationGroupByQueryNoRewrites() { + String query = "SELECT col1 FROM foo GROUP BY col1, col2"; + PinotQuery pinotQuery = compileToPinotQuery(query); + Assert.assertEquals(pinotQuery.getSelectListSize(), 1); + Assert.assertEquals( + pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); + Assert.assertEquals( + pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "col1"); + Assert.assertEquals( + pinotQuery.getGroupByList().get(1).getIdentifier().getName(), "col2"); + + query = "SELECT col1+col2 FROM foo GROUP BY col1,col2"; + pinotQuery = compileToPinotQuery(query); + Assert.assertEquals(pinotQuery.getSelectListSize(), 1); + Assert.assertEquals( + pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), + "plus"); + Assert.assertEquals( + pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1"); + Assert.assertEquals( + pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2"); + } + @Test public void testInvalidNonAggregationGroupBy() { - Assert.assertThrows(SqlCompilationException.class, - () -> compileToPinotQuery("SELECT col1 FROM foo GROUP BY col1, col2")); Assert.assertThrows(SqlCompilationException.class, () -> compileToPinotQuery("SELECT col1, col2 FROM foo GROUP BY col1")); Assert.assertThrows(SqlCompilationException.class, () -> compileToPinotQuery("SELECT col1 + col2 FROM foo GROUP BY col1")); - Assert.assertThrows(SqlCompilationException.class, - () -> compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1,col2")); } @Test diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java index a005bc78de67..e6b401664c90 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java @@ -53,13 +53,25 @@ private void testQueryRewrite(String original, String expected) { @Test public void testUnsupportedQueries() { - testUnsupportedQuery("SELECT col1 FROM foo GROUP BY col1, col2"); testUnsupportedQuery("SELECT col1, col2 FROM foo GROUP BY col1"); - testUnsupportedQuery("SELECT col1 + col2 FROM foo GROUP BY col1, col2"); + testUnsupportedQuery("SELECT concat(col1, col2) FROM foo GROUP BY col1"); + testUnsupportedQuery("SELECT col1+col2*5 FROM foo GROUP BY col1"); } private void testUnsupportedQuery(String query) { assertThrows(SqlCompilationException.class, () -> QUERY_REWRITER.rewrite(CalciteSqlParser.compileToPinotQuery(query))); } + + @Test + public void testSkipQueryRewrite() { + testSkipQueryRewrite("SELECT col1 FROM foo GROUP BY col1, col2"); + testSkipQueryRewrite("SELECT col1 + col2 FROM foo GROUP BY col1, col2"); + testSkipQueryRewrite("SELECT col1+col2*5 FROM foo GROUP BY col1, col2"); + } + + private void testSkipQueryRewrite(String query) { + assertEquals(QUERY_REWRITER.rewrite(CalciteSqlParser.compileToPinotQuery(query)), + CalciteSqlParser.compileToPinotQuery(query)); + } } diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java index 63e9f80e79e7..611ece834b62 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java @@ -2783,6 +2783,18 @@ public void testNonAggregationGroupByQuery(boolean useMultiStageQueryEngine) h2Query = "SELECT CAST(ArrTime-DepTime AS FLOAT) FROM mytable GROUP BY CAST(ArrTime-DepTime AS FLOAT)"; testQuery(pinotQuery, h2Query); + pinotQuery = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000"; + h2Query = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime"; + testQuery(pinotQuery, h2Query); + + pinotQuery = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000"; + h2Query = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime"; + testQuery(pinotQuery, h2Query); + + pinotQuery = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime LIMIT 1000000"; + h2Query = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime"; + testQuery(pinotQuery, h2Query); + pinotQuery = "SELECT ArrTime+DepTime AS A FROM mytable GROUP BY A LIMIT 1000000"; h2Query = "SELECT CAST(ArrTime+DepTime AS FLOAT) AS A FROM mytable GROUP BY A"; testQuery(pinotQuery, h2Query);