From 6602841c4641ae4debf995a651e490c4515b523d Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Fri, 8 Sep 2023 19:05:24 -0700 Subject: [PATCH] Exposing optional replaceMissingValueWith in lookup function and macros --- docs/querying/lookups.md | 6 +++ .../query/expression/LookupExprMacro.java | 25 +++++++++- .../query/expression/LookupExprMacroTest.java | 6 ++- .../QueryLookupOperatorConversion.java | 30 +++++++++++- .../druid/sql/calcite/CalciteQueryTest.java | 46 +++++++++++++++++++ 5 files changed, 108 insertions(+), 5 deletions(-) diff --git a/docs/querying/lookups.md b/docs/querying/lookups.md index 4a592c158b5f4..a98b3c6a25e43 100644 --- a/docs/querying/lookups.md +++ b/docs/querying/lookups.md @@ -62,6 +62,12 @@ SELECT FROM sales GROUP BY 1 ``` +Lookup function also accepts the 3rd argument called `$replaceMissingValueWith` as constant string, if you value is missing given lookups for queried key then lookup function return result value from `replaceMissingValueWith` +eg: +``` +LOOKUP(store, 'store_to_country', 'NA') +``` +If value is missing from `store_to_country` lookup for given key 'store' then it will return `NA`. They can also be queried using the [JOIN operator](datasource.md#join): diff --git a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java index f824038586c67..0a3037a0a6ccf 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java @@ -54,10 +54,11 @@ public String name() @Override public Expr apply(final List args) { - validationHelperCheckArgumentCount(args, 2); + validationHelperCheckMinArgumentCount(args, 2); final Expr arg = args.get(0); final Expr lookupExpr = args.get(1); + final String replaceMissingValueWith = getReplaceMissingValueWith(args); validationHelperCheckArgIsLiteral(lookupExpr, "second argument"); if (lookupExpr.getLiteralValue() == null) { @@ -69,7 +70,7 @@ public Expr apply(final List args) lookupExtractorFactoryContainerProvider, lookupName, false, - null, + replaceMissingValueWith, false, null ); @@ -104,6 +105,15 @@ public ExpressionType getOutputType(InputBindingInspector inspector) @Override public String stringify() { + if (replaceMissingValueWith != null) { + return StringUtils.format( + "%s(%s, %s, '%s')", + FN_NAME, + arg.stringify(), + lookupExpr.stringify(), + replaceMissingValueWith + ); + } return StringUtils.format("%s(%s, %s)", FN_NAME, arg.stringify(), lookupExpr.stringify()); } @@ -116,4 +126,15 @@ public void decorateCacheKeyBuilder(CacheKeyBuilder builder) return new LookupExpr(arg); } + + private String getReplaceMissingValueWith(final List args) + { + if (args.size() > 2) { + final Expr missingValExpr = args.get(2); + if (missingValExpr.isLiteral()) { + return missingValExpr.getLiteralValue().toString(); + } + } + return null; + } } diff --git a/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java b/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java index 65a3e68dcfead..1644d57948dd9 100644 --- a/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java +++ b/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java @@ -47,7 +47,11 @@ public void testLookup() { assertExpr("lookup(x, 'lookyloo')", "xfoo"); } - + @Test + public void testLookupMissingValue() + { + assertExpr("lookup(y, 'lookyloo', 'N/A')", "N/A"); + } @Test public void testLookupNotFound() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java index 18c58691d28e3..bf0d94611eb78 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java @@ -30,16 +30,27 @@ import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider; import org.apache.druid.query.lookup.RegisteredLookupExtractionFn; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.BasicOperandTypeChecker; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.OperatorConversions; import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; import org.apache.druid.sql.calcite.planner.PlannerContext; +import java.util.List; + public class QueryLookupOperatorConversion implements SqlOperatorConversion { private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("LOOKUP") - .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) + .operandTypeChecker( + BasicOperandTypeChecker.builder() + .operandTypes( + SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER + ) + .requiredOperandCount(2) + .build()) .returnTypeNullable(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .build(); @@ -73,6 +84,7 @@ public DruidExpression toDruidExpression( inputExpressions -> { final DruidExpression arg = inputExpressions.get(0); final Expr lookupNameExpr = plannerContext.parseExpression(inputExpressions.get(1).getExpression()); + final String replaceMissingValueWith = getReplaceMissingValueWith(inputExpressions, plannerContext); if (arg.isSimpleExtraction() && lookupNameExpr.isLiteral()) { return arg.getSimpleExtraction().cascade( @@ -80,7 +92,7 @@ public DruidExpression toDruidExpression( lookupExtractorFactoryContainerProvider, (String) lookupNameExpr.getLiteralValue(), false, - null, + replaceMissingValueWith, null, true ) @@ -91,4 +103,18 @@ public DruidExpression toDruidExpression( } ); } + + private String getReplaceMissingValueWith( + final List inputExpressions, + final PlannerContext plannerContext + ) + { + if (inputExpressions.size() > 2) { + final Expr missingValExpr = plannerContext.parseExpression(inputExpressions.get(2).getExpression()); + if (missingValExpr.isLiteral()) { + return missingValExpr.getLiteralValue().toString(); + } + } + return null; + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 582408003719a..b3c502d0c0a78 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -8786,6 +8786,52 @@ public void testFilterAndGroupByLookup() ) ); } + @Test + public void testLookupReplaceMissingValueWith() + { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + + final RegisteredLookupExtractionFn extractionFn = new RegisteredLookupExtractionFn( + null, + "lookyloo", + false, + "Missing_Value", + null, + true + ); + + testQuery( + "SELECT LOOKUP(dim1, 'lookyloo', 'Missing_Value'), COUNT(*) FROM foo group by 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + ColumnType.STRING, + extractionFn + ) + ) + ) + .setAggregatorSpecs( + aggregators( + new CountAggregatorFactory("a0") + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"Missing_Value", 5L}, + new Object[]{"xabc", 1L} + ) + ); + } @Test public void testCountDistinctOfLookup()