Skip to content

Commit

Permalink
For non-aggregation group-by query, if select clause is not matching …
Browse files Browse the repository at this point in the history
…to group by clause, return original query (apache#13792)
  • Loading branch information
tarun11Mavani authored Aug 21, 2024
1 parent 35f8082 commit de3ea87
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ private static void validateGroupByClause(PinotQuery pinotQuery)
aggregateExprCount++;
} else if (hasGroupByClause && expressionOutsideGroupByList(selectExpression, groupByExprs)) {
throw new SqlCompilationException(
"'" + RequestUtils.prettyPrint(selectExpression) + "' should appear in GROUP BY clause.");
"'" + RequestUtils.prettyPrint(selectExpression) + "' should be functionally dependent on the columns "
+ "used in GROUP BY clause.");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@
import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.apache.pinot.sql.parsers.SqlCompilationException;


/**
* Rewrite non-aggregation group-by query to distinct query.
* The query can be rewritten only if select expression set and group-by expression set are the same.
* If they are not, the original query is returned.
*
* E.g.
* SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
Expand All @@ -41,7 +40,7 @@
* SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
* SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo
*
* Unsupported queries:
* Not rewritten queries:
* SELECT col1 FROM foo GROUP BY col1, col2 (not equivalent to SELECT DISTINCT col1 FROM foo)
* SELECT col1 + col2 FROM foo GROUP BY col1, col2 (not equivalent to SELECT col1 + col2 FROM foo)
*/
Expand Down Expand Up @@ -76,6 +75,7 @@ public PinotQuery rewrite(PinotQuery pinotQuery) {
}
}
Set<Expression> groupByExpressions = new HashSet<>(pinotQuery.getGroupByList());
// If SELECT and GROUP BY set are equal, rewrite the query as DISTINCT
if (selectExpressions.equals(groupByExpressions)) {
Expression distinct = RequestUtils.getFunctionExpression("distinct", pinotQuery.getSelectList());
// NOTE: Create an ArrayList because we might need to modify the list later
Expand All @@ -85,9 +85,7 @@ public PinotQuery rewrite(PinotQuery pinotQuery) {
pinotQuery.setGroupByList(null);
return pinotQuery;
} else {
throw new SqlCompilationException(String.format(
"For non-aggregation group-by query, select expression set and group-by expression set should be the same. "
+ "Found select: %s, group-by: %s", selectExpressions, groupByExpressions));
return pinotQuery;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1379,7 +1379,8 @@ public void testQueryValidation() {
Assert.fail("Query should have failed compilation");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
Assert.assertTrue(e.getMessage().contains("'group_city' should appear in GROUP BY clause."));
Assert.assertTrue(e.getMessage().contains("'group_city' should be functionally dependent on the columns "
+ "used in GROUP BY clause."));
}

// Valid groupBy non-aggregate function should pass.
Expand All @@ -1397,7 +1398,8 @@ public void testQueryValidation() {
Assert.fail("Query should have failed compilation");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
Assert.assertTrue(e.getMessage().contains("'secondsSinceEpoch' should appear in GROUP BY clause."));
Assert.assertTrue(e.getMessage().contains("'secondsSinceEpoch' should be functionally dependent on the columns "
+ "used in GROUP BY clause."));
}

// Invalid groupBy clause shouldn't contain aggregate expression, like sum(rsvp_count), count(*).
Expand Down Expand Up @@ -2592,16 +2594,36 @@ public void testNonAggregationGroupByQuery() {
5);
}

@Test
public void testNonAggregationGroupByQueryNoRewrites() {
String query = "SELECT col1 FROM foo GROUP BY col1, col2";
PinotQuery pinotQuery = compileToPinotQuery(query);
Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
Assert.assertEquals(
pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1");
Assert.assertEquals(
pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "col1");
Assert.assertEquals(
pinotQuery.getGroupByList().get(1).getIdentifier().getName(), "col2");

query = "SELECT col1+col2 FROM foo GROUP BY col1,col2";
pinotQuery = compileToPinotQuery(query);
Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
Assert.assertEquals(
pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(),
"plus");
Assert.assertEquals(
pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
Assert.assertEquals(
pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2");
}

@Test
public void testInvalidNonAggregationGroupBy() {
Assert.assertThrows(SqlCompilationException.class,
() -> compileToPinotQuery("SELECT col1 FROM foo GROUP BY col1, col2"));
Assert.assertThrows(SqlCompilationException.class,
() -> compileToPinotQuery("SELECT col1, col2 FROM foo GROUP BY col1"));
Assert.assertThrows(SqlCompilationException.class,
() -> compileToPinotQuery("SELECT col1 + col2 FROM foo GROUP BY col1"));
Assert.assertThrows(SqlCompilationException.class,
() -> compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1,col2"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,25 @@ private void testQueryRewrite(String original, String expected) {

@Test
public void testUnsupportedQueries() {
testUnsupportedQuery("SELECT col1 FROM foo GROUP BY col1, col2");
testUnsupportedQuery("SELECT col1, col2 FROM foo GROUP BY col1");
testUnsupportedQuery("SELECT col1 + col2 FROM foo GROUP BY col1, col2");
testUnsupportedQuery("SELECT concat(col1, col2) FROM foo GROUP BY col1");
testUnsupportedQuery("SELECT col1+col2*5 FROM foo GROUP BY col1");
}

private void testUnsupportedQuery(String query) {
assertThrows(SqlCompilationException.class,
() -> QUERY_REWRITER.rewrite(CalciteSqlParser.compileToPinotQuery(query)));
}

@Test
public void testSkipQueryRewrite() {
testSkipQueryRewrite("SELECT col1 FROM foo GROUP BY col1, col2");
testSkipQueryRewrite("SELECT col1 + col2 FROM foo GROUP BY col1, col2");
testSkipQueryRewrite("SELECT col1+col2*5 FROM foo GROUP BY col1, col2");
}

private void testSkipQueryRewrite(String query) {
assertEquals(QUERY_REWRITER.rewrite(CalciteSqlParser.compileToPinotQuery(query)),
CalciteSqlParser.compileToPinotQuery(query));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2783,6 +2783,18 @@ public void testNonAggregationGroupByQuery(boolean useMultiStageQueryEngine)
h2Query = "SELECT CAST(ArrTime-DepTime AS FLOAT) FROM mytable GROUP BY CAST(ArrTime-DepTime AS FLOAT)";
testQuery(pinotQuery, h2Query);

pinotQuery = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000";
h2Query = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime";
testQuery(pinotQuery, h2Query);

pinotQuery = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000";
h2Query = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime";
testQuery(pinotQuery, h2Query);

pinotQuery = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime LIMIT 1000000";
h2Query = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime";
testQuery(pinotQuery, h2Query);

pinotQuery = "SELECT ArrTime+DepTime AS A FROM mytable GROUP BY A LIMIT 1000000";
h2Query = "SELECT CAST(ArrTime+DepTime AS FLOAT) AS A FROM mytable GROUP BY A";
testQuery(pinotQuery, h2Query);
Expand Down

0 comments on commit de3ea87

Please sign in to comment.