From f1edd671fb4310bed5b3f5679665d74ccd9964df Mon Sep 17 00:00:00 2001 From: Pranav Date: Mon, 2 Oct 2023 17:09:23 -0700 Subject: [PATCH] Exposing optional replaceMissingValueWith in lookup function and macros (#14956) * Exposing optional replaceMissingValueWith in lookup function and macros * args range validation * Updating docs * Addressing comments * Update docs/querying/sql-scalar.md Co-authored-by: Clint Wylie * Update docs/querying/sql-functions.md Co-authored-by: Clint Wylie * Addressing comments --------- Co-authored-by: Clint Wylie --- docs/querying/lookups.md | 6 ++ docs/querying/math-expr.md | 2 +- docs/querying/sql-functions.md | 2 +- docs/querying/sql-scalar.md | 2 +- .../query/expression/LookupExprMacro.java | 27 +++++- .../query/expression/LookupExprMacroTest.java | 95 +++++++++++++++++++ .../query/expression/LookupExprMacroTest.java | 7 +- .../QueryLookupOperatorConversion.java | 32 ++++++- .../druid/sql/calcite/CalciteQueryTest.java | 59 ++++++++++++ website/.spelling | 1 + 10 files changed, 225 insertions(+), 8 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java diff --git a/docs/querying/lookups.md b/docs/querying/lookups.md index 4a592c158b5f..10aa6690ec23 100644 --- a/docs/querying/lookups.md +++ b/docs/querying/lookups.md @@ -62,6 +62,12 @@ SELECT FROM sales GROUP BY 1 ``` +The lookup function also accepts the 3rd argument called `replaceMissingValueWith` as a constant string. If your value is missing a lookup for the queried key, the lookup function returns the result value from `replaceMissingValueWith` +For example: +``` +LOOKUP(store, 'store_to_country', 'NA') +``` +If value is missing from `store_to_country` lookup for given key 'store' then it will return `NA`. They can also be queried using the [JOIN operator](datasource.md#join): diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md index dcb4b7ce7787..340fa4ee8650 100644 --- a/docs/querying/math-expr.md +++ b/docs/querying/math-expr.md @@ -82,7 +82,7 @@ The following built-in functions are available. |concat|concat(expr, expr...) concatenate a list of strings| |format|format(pattern[, args...]) returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#format-java.lang.String-java.lang.Object...-).| |like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`| -|lookup|lookup(expr, lookup-name) looks up expr in a registered [query-time lookup](../querying/lookups.md)| +|lookup|lookup(expr, lookup-name[,replaceMissingValueWith]) looks up expr in a registered,`replaceMissingValueWith` is an optional constant string [query-time lookup](../querying/lookups.md)| |parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.| |regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.| |regexp_like|regexp_like(expr, pattern) returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. | diff --git a/docs/querying/sql-functions.md b/docs/querying/sql-functions.md index 3df24ea607f6..467e79b64384 100644 --- a/docs/querying/sql-functions.md +++ b/docs/querying/sql-functions.md @@ -885,7 +885,7 @@ Calculates the base-10 of the numeric expression. ## LOOKUP -`LOOKUP(, )` +`LOOKUP(, [, ])` **Function type:** [Scalar, string](sql-scalar.md#string-functions) diff --git a/docs/querying/sql-scalar.md b/docs/querying/sql-scalar.md index e60a8fe43967..c9409dd07bfd 100644 --- a/docs/querying/sql-scalar.md +++ b/docs/querying/sql-scalar.md @@ -98,7 +98,7 @@ String functions accept strings, and return a type appropriate to the function. |`CHAR_LENGTH(expr)`|Alias for `LENGTH`.| |`CHARACTER_LENGTH(expr)`|Alias for `LENGTH`.| |`STRLEN(expr)`|Alias for `LENGTH`.| -|`LOOKUP(expr, lookupName)`|Look up `expr` in a registered [query-time lookup table](lookups.md). Note that lookups can also be queried directly using the [`lookup` schema](sql.md#from).| +|`LOOKUP(expr, lookupName, [replaceMissingValueWith])`|Look up `expr` in a registered [query-time lookup table](lookups.md). Note that lookups can also be queried directly using the [`lookup` schema](sql.md#from). Optional constant replaceMissingValueWith can be passed as 3rd argument to be returned when value is missing from lookup.| |`LOWER(expr)`|Returns `expr` in all lowercase.| |`UPPER(expr)`|Returns `expr` in all uppercase.| |`PARSE_LONG(string, [radix])`|Parses a string into a long (BIGINT) with the given radix, or 10 (decimal) if a radix is not provided.| diff --git a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java index f824038586c6..ad9884251a82 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/LookupExprMacro.java @@ -22,6 +22,7 @@ import com.google.inject.Inject; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; @@ -54,10 +55,11 @@ public String name() @Override public Expr apply(final List args) { - validationHelperCheckArgumentCount(args, 2); + validationHelperCheckArgumentRange(args, 2, 3); final Expr arg = args.get(0); final Expr lookupExpr = args.get(1); + final Expr replaceMissingValueWith = getReplaceMissingValueWith(args); validationHelperCheckArgIsLiteral(lookupExpr, "second argument"); if (lookupExpr.getLiteralValue() == null) { @@ -69,7 +71,9 @@ public Expr apply(final List args) lookupExtractorFactoryContainerProvider, lookupName, false, - null, + replaceMissingValueWith != null && replaceMissingValueWith.isLiteral() + ? Evals.asString(replaceMissingValueWith.getLiteralValue()) + : null, false, null ); @@ -104,6 +108,15 @@ public ExpressionType getOutputType(InputBindingInspector inspector) @Override public String stringify() { + if (replaceMissingValueWith != null) { + return StringUtils.format( + "%s(%s, %s, %s)", + FN_NAME, + arg.stringify(), + lookupExpr.stringify(), + replaceMissingValueWith.stringify() + ); + } return StringUtils.format("%s(%s, %s)", FN_NAME, arg.stringify(), lookupExpr.stringify()); } @@ -116,4 +129,14 @@ public void decorateCacheKeyBuilder(CacheKeyBuilder builder) return new LookupExpr(arg); } + + private Expr getReplaceMissingValueWith(final List args) + { + if (args.size() > 2) { + final Expr missingValExpr = args.get(2); + validationHelperCheckArgIsLiteral(missingValExpr, "third argument"); + return missingValExpr; + } + return null; + } } diff --git a/processing/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java new file mode 100644 index 000000000000..4db019210461 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import com.google.common.collect.Lists; +import org.apache.commons.compress.utils.Sets; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.query.lookup.LookupExtractorFactoryContainer; +import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +public class LookupExprMacroTest extends MacroTestBase +{ + public LookupExprMacroTest() + { + super( + new LookupExprMacro(new LookupExtractorFactoryContainerProvider() + { + @Override + public Set getAllLookupNames() + { + return Sets.newHashSet("test_lookup"); + } + + @Override + public Optional get(String lookupName) + { + return Optional.empty(); + } + }) + ); + } + + @Test + public void testTooFewArgs() + { + expectException(IllegalArgumentException.class, "Function[lookup] requires 2 to 3 arguments"); + apply(Collections.emptyList()); + } + + @Test + public void testNonLiteralLookupName() + { + expectException( + IllegalArgumentException.class, + "Function[lookup] second argument must be a registered lookup name" + ); + apply(getArgs(Lists.newArrayList("1", new ArrayList()))); + } + + @Test + public void testValidCalls() + { + Assert.assertNotNull(apply(getArgs(Lists.newArrayList("1", "test_lookup")))); + Assert.assertNotNull(apply(getArgs(Lists.newArrayList("null", "test_lookup")))); + Assert.assertNotNull(apply(getArgs(Lists.newArrayList("1", "test_lookup", null)))); + Assert.assertNotNull(apply(getArgs(Lists.newArrayList("1", "test_lookup", "N/A")))); + } + + private List getArgs(List args) + { + return args.stream().map(a -> { + if (a != null && a instanceof String) { + return ExprEval.of(a.toString()).toExpr(); + } + return ExprEval.bestEffortOf(null).toExpr(); + }).collect(Collectors.toList()); + } +} diff --git a/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java b/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java index 65a3e68dcfea..96803a309665 100644 --- a/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java +++ b/server/src/test/java/org/apache/druid/query/expression/LookupExprMacroTest.java @@ -47,7 +47,12 @@ public void testLookup() { assertExpr("lookup(x, 'lookyloo')", "xfoo"); } - + @Test + public void testLookupMissingValue() + { + assertExpr("lookup(y, 'lookyloo', 'N/A')", "N/A"); + assertExpr("lookup(y, 'lookyloo', null)", null); + } @Test public void testLookupNotFound() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java index 18c58691d28e..21d4bea356cc 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/QueryLookupOperatorConversion.java @@ -26,20 +26,33 @@ import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider; import org.apache.druid.query.lookup.RegisteredLookupExtractionFn; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.BasicOperandTypeChecker; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.OperatorConversions; import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; import org.apache.druid.sql.calcite.planner.PlannerContext; +import java.util.List; + public class QueryLookupOperatorConversion implements SqlOperatorConversion { private static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("LOOKUP") - .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) + .operandTypeChecker( + BasicOperandTypeChecker.builder() + .operandTypes( + SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER + ) + .requiredOperandCount(2) + .literalOperands(2) + .build()) .returnTypeNullable(SqlTypeName.VARCHAR) .functionCategory(SqlFunctionCategory.STRING) .build(); @@ -73,6 +86,7 @@ public DruidExpression toDruidExpression( inputExpressions -> { final DruidExpression arg = inputExpressions.get(0); final Expr lookupNameExpr = plannerContext.parseExpression(inputExpressions.get(1).getExpression()); + final String replaceMissingValueWith = getReplaceMissingValueWith(inputExpressions, plannerContext); if (arg.isSimpleExtraction() && lookupNameExpr.isLiteral()) { return arg.getSimpleExtraction().cascade( @@ -80,7 +94,7 @@ public DruidExpression toDruidExpression( lookupExtractorFactoryContainerProvider, (String) lookupNameExpr.getLiteralValue(), false, - null, + replaceMissingValueWith, null, true ) @@ -91,4 +105,18 @@ public DruidExpression toDruidExpression( } ); } + + private String getReplaceMissingValueWith( + final List inputExpressions, + final PlannerContext plannerContext + ) + { + if (inputExpressions.size() > 2) { + final Expr missingValExpr = plannerContext.parseExpression(inputExpressions.get(2).getExpression()); + if (missingValExpr.isLiteral()) { + return Evals.asString(missingValExpr.getLiteralValue()); + } + } + return null; + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 31107e407c2f..35f85c5fbf2b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -8966,6 +8966,65 @@ public void testFilterAndGroupByLookup() ) ); } + @Test + public void testLookupReplaceMissingValueWith() + { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + + final RegisteredLookupExtractionFn extractionFn1 = new RegisteredLookupExtractionFn( + null, + "lookyloo", + false, + "Missing_Value", + null, + true + ); + final RegisteredLookupExtractionFn extractionFnRMVNull = new RegisteredLookupExtractionFn( + null, + "lookyloo", + false, + null, + null, + true + ); + testQuery( + "SELECT LOOKUP(dim1, 'lookyloo', 'Missing_Value'), LOOKUP(dim1, 'lookyloo', null) as rmvNull, COUNT(*) FROM foo group by 1,2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new ExtractionDimensionSpec( + "dim1", + "d0", + ColumnType.STRING, + extractionFn1 + ), + new ExtractionDimensionSpec( + "dim1", + "d1", + ColumnType.STRING, + extractionFnRMVNull + ) + ) + ) + .setAggregatorSpecs( + aggregators( + new CountAggregatorFactory("a0") + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"Missing_Value", NullHandling.defaultStringValue(), 5L}, + new Object[]{"xabc", "xabc", 1L} + ) + ); + } @Test public void testCountDistinctOfLookup() diff --git a/website/.spelling b/website/.spelling index 10751d69530a..00468cc69763 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1130,6 +1130,7 @@ simpleJson dimensionSpec flattenSpec binaryAsString +replaceMissingValueWith sslFactory sslMode Proto