From 24af354e2df27e49537441ad781a268c006ace5e Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:45:33 -0400 Subject: [PATCH] [DerivedField] Support DerivedField object type and integration with Mapper (#13720) (#14105) * Support DerivedField object type * ValueFetcher for DerivedField of object type to parse and fetch nested fields. * DerivedFieldResolver to resolve DerivedField type on the fly. * Introduction of new support types - text, float. * Use index analyzer for derived fields set in index settings. * Support of derived fields from query string * Unit tests * Derived field object type properties to also support Object as a value * Index and cluster setting to control derived fields * Fix test * Fixed missing javadoc * Fix the rewrite issue when profile is set as true * Address PR comments * Address PR comments * Use conjuction query with filter on source_indexed_field query and DerivedFieldQuery * support source_indexed_field for non-object type derived fields * rename source_indexed_field to prefilter_field --------- (cherry picked from commit 9da6170fc38653284cfeee0e5c1800d41999ba08) Signed-off-by: Rishabh Maurya Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../common/settings/ClusterSettings.java | 3 +- .../common/settings/IndexScopedSettings.java | 2 +- .../org/opensearch/index/IndexSettings.java | 22 + .../mapper/DefaultDerivedFieldResolver.java | 229 ++++++ .../opensearch/index/mapper/DerivedField.java | 29 +- .../index/mapper/DerivedFieldMapper.java | 180 +++-- .../index/mapper/DerivedFieldResolver.java | 37 + .../mapper/DerivedFieldResolverFactory.java | 58 ++ .../mapper/DerivedFieldSupportedTypes.java | 95 ++- .../index/mapper/DerivedFieldType.java | 304 +++++++- .../mapper/DerivedFieldValueFetcher.java | 18 +- .../mapper/NoOpDerivedFieldResolver.java | 28 + .../index/mapper/ObjectDerivedFieldType.java | 168 +++++ .../index/query/DerivedFieldQuery.java | 34 +- .../index/query/QueryShardContext.java | 50 +- .../VectorGeoPointShapeQueryProcessor.java | 2 +- .../index/search/QueryParserHelper.java | 3 + .../opensearch/script/DerivedFieldScript.java | 4 +- .../org/opensearch/search/SearchService.java | 51 +- .../search/builder/SearchSourceBuilder.java | 6 +- .../subphase/highlight/HighlightPhase.java | 4 +- .../highlight/UnifiedHighlighter.java | 6 +- .../mapper/DerivedFieldMapperQueryTests.java | 662 +++++++++++++++++- .../mapper/DerivedFieldResolverTests.java | 537 ++++++++++++++ .../index/mapper/DerivedFieldTypeTests.java | 62 +- .../index/query/DerivedFieldQueryTests.java | 90 ++- .../index/query/QueryShardContextTests.java | 37 +- .../opensearch/search/SearchServiceTests.java | 87 ++- .../builder/SearchSourceBuilderTests.java | 6 +- .../DerivedFieldFetchAndHighlightTests.java | 242 +++++-- 30 files changed, 2736 insertions(+), 320 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolverFactory.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/NoOpDerivedFieldResolver.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 52adf1825d62b..f157ba8851c8c 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -746,7 +746,8 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS, - RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA + RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA, + SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 980c432774f6e..6fe8dec9c21b1 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -237,7 +237,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { // Settings for concurrent segment search IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING, - + IndexSettings.ALLOW_DERIVED_FIELDS, // validate that built-in similarities don't get redefined Setting.groupSetting("index.similarity.", (s) -> { Map groups = s.getAsGroups(); diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 16f3af42ed495..de18f07cb3c87 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -152,6 +152,14 @@ public static IndexMergePolicy fromString(String text) { true, Property.IndexScope ); + + public static final Setting ALLOW_DERIVED_FIELDS = Setting.boolSetting( + "index.query.derived_field.enabled", + true, + Property.Dynamic, + Property.IndexScope + ); + public static final Setting INDEX_TRANSLOG_SYNC_INTERVAL_SETTING = Setting.timeSetting( "index.translog.sync_interval", TimeValue.timeValueSeconds(5), @@ -763,6 +771,7 @@ public static IndexMergePolicy fromString(String text) { private final boolean assignedOnRemoteNode; private final RemoteStorePathStrategy remoteStorePathStrategy; private final boolean isTranslogMetadataEnabled; + private volatile boolean allowDerivedField; /** * The maximum age of a retention lease before it is considered expired. @@ -856,6 +865,10 @@ private void setDefaultFields(List defaultFields) { this.defaultFields = defaultFields; } + private void setAllowDerivedField(boolean allowDerivedField) { + this.allowDerivedField = allowDerivedField; + } + /** * Returns true if query string parsing should be lenient. The default is false */ @@ -884,6 +897,13 @@ public boolean isDefaultAllowUnmappedFields() { return defaultAllowUnmappedFields; } + /** + * Returns true if queries are allowed to define and use derived fields. The default is true + */ + public boolean isDerivedFieldAllowed() { + return allowDerivedField; + } + /** * Creates a new {@link IndexSettings} instance. The given node settings will be merged with the settings in the metadata * while index level settings will overwrite node settings. @@ -930,6 +950,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti this.queryStringAnalyzeWildcard = QUERY_STRING_ANALYZE_WILDCARD.get(nodeSettings); this.queryStringAllowLeadingWildcard = QUERY_STRING_ALLOW_LEADING_WILDCARD.get(nodeSettings); this.defaultAllowUnmappedFields = scopedSettings.get(ALLOW_UNMAPPED); + this.allowDerivedField = scopedSettings.get(ALLOW_DERIVED_FIELDS); this.durability = scopedSettings.get(INDEX_TRANSLOG_DURABILITY_SETTING); defaultFields = scopedSettings.get(DEFAULT_FIELD_SETTING); syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings); @@ -1104,6 +1125,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING, this::setDocIdFuzzySetFalsePositiveProbability ); + scopedSettings.addSettingsUpdateConsumer(ALLOW_DERIVED_FIELDS, this::setAllowDerivedField); } private void setSearchIdleAfter(TimeValue searchIdleAfter) { diff --git a/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java new file mode 100644 index 0000000000000..c577a4117247b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java @@ -0,0 +1,229 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.regex.Regex; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.Script; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import static org.opensearch.index.mapper.FieldMapper.IGNORE_MALFORMED_SETTING; + +/** + * Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client. + * The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name. + * It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type. + */ +public class DefaultDerivedFieldResolver implements DerivedFieldResolver { + private final QueryShardContext queryShardContext; + private final Map derivedFieldTypeMap = new ConcurrentHashMap<>(); + private final FieldTypeInference typeInference; + private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class); + + DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields + ) { + this( + queryShardContext, + derivedFieldsObject, + derivedFields, + new FieldTypeInference( + queryShardContext.index().getName(), + queryShardContext.getMapperService(), + queryShardContext.getIndexReader() + ) + ); + } + + DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields, + FieldTypeInference typeInference + ) { + this.queryShardContext = queryShardContext; + initDerivedFieldTypes(derivedFieldsObject, derivedFields); + this.typeInference = typeInference; + } + + @Override + public Set resolvePattern(String pattern) { + Set derivedFields = new HashSet<>(); + if (queryShardContext != null && queryShardContext.getMapperService() != null) { + for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) { + if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) { + derivedFields.add(fieldType.name()); + } + } + } + for (String fieldName : derivedFieldTypeMap.keySet()) { + if (Regex.simpleMatch(pattern, fieldName)) { + derivedFields.add(fieldName); + } + } + return derivedFields; + } + + /** + * Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping. + * It caches the response for previously resolved field names + * @param fieldName name of the field. It also accepts nested derived field + * @return DerivedFieldType if resolved successfully, a null otherwise. + */ + @Override + public DerivedFieldType resolve(String fieldName) { + return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName)).orElseGet(() -> resolveUsingMappings(fieldName)); + } + + private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) { + return Optional.ofNullable(derivedFieldTypeMap.get(fieldName)) + .orElseGet( + () -> Optional.ofNullable((DerivedFieldType) getParentDerivedField(fieldName)) + .map( + // compute and cache nested derived field + parentDerivedField -> derivedFieldTypeMap.computeIfAbsent( + fieldName, + f -> this.resolveNestedField(f, parentDerivedField) + ) + ) + .orElse(null) + ); + } + + private DerivedFieldType resolveNestedField(String fieldName, DerivedFieldType parentDerivedField) { + Objects.requireNonNull(parentDerivedField); + try { + Script script = parentDerivedField.derivedField.getScript(); + String nestedType = explicitTypeFromParent(parentDerivedField.derivedField, fieldName.substring(fieldName.indexOf(".") + 1)); + if (nestedType == null) { + Mapper inferredFieldMapper = typeInference.infer( + getValueFetcher(fieldName, script, parentDerivedField.derivedField.getIgnoreMalformed()) + ); + if (inferredFieldMapper != null) { + nestedType = inferredFieldMapper.typeName(); + } + } + if (nestedType != null) { + DerivedField derivedField = new DerivedField(fieldName, nestedType, script); + if (parentDerivedField.derivedField.getProperties() != null) { + derivedField.setProperties(parentDerivedField.derivedField.getProperties()); + } + if (parentDerivedField.derivedField.getPrefilterField() != null) { + derivedField.setPrefilterField(parentDerivedField.derivedField.getPrefilterField()); + } + if (parentDerivedField.derivedField.getFormat() != null) { + derivedField.setFormat(parentDerivedField.derivedField.getFormat()); + } + if (parentDerivedField.derivedField.getIgnoreMalformed()) { + derivedField.setIgnoreMalformed(parentDerivedField.derivedField.getIgnoreMalformed()); + } + return getDerivedFieldType(derivedField); + } else { + logger.warn( + "Field type cannot be inferred. Ensure the field {} is not rare across entire index or provide explicit mapping using [properties] under parent object [{}] ", + fieldName, + parentDerivedField.derivedField.getName() + ); + } + } catch (IOException e) { + logger.warn(e.getMessage()); + } + return null; + } + + private MappedFieldType getParentDerivedField(String fieldName) { + if (fieldName.contains(".")) { + return resolve(fieldName.split("\\.")[0]); + } + return null; + } + + private static String explicitTypeFromParent(DerivedField parentDerivedField, String subField) { + if (parentDerivedField == null) { + return null; + } + return parentDerivedField.getNestedFieldType(subField); + } + + ValueFetcher getValueFetcher(String fieldName, Script script, boolean ignoreMalformed) { + String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1); + return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher( + subFieldName, + DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()), + o -> o, // raw object returned will be used to infer the type without modifying it + ignoreMalformed + ); + } + + private void initDerivedFieldTypes(Map derivedFieldsObject, List derivedFields) { + if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) { + Map derivedFieldObject = new HashMap<>(); + derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject); + derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject)); + } + if (derivedFields != null) { + for (DerivedField derivedField : derivedFields) { + derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField)); + } + } + } + + private Map getAllDerivedFieldTypeFromObject(Map derivedFieldObject) { + Map derivedFieldTypes = new HashMap<>(); + DocumentMapper documentMapper = queryShardContext.getMapperService() + .documentMapperParser() + .parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject); + if (documentMapper != null && documentMapper.mappers() != null) { + for (Mapper mapper : documentMapper.mappers()) { + if (mapper instanceof DerivedFieldMapper) { + DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType(); + derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType); + } + } + } + return derivedFieldTypes; + } + + private DerivedFieldType getDerivedFieldType(DerivedField derivedField) { + Mapper.BuilderContext builderContext = new Mapper.BuilderContext( + queryShardContext.getMapperService().getIndexSettings().getSettings(), + new ContentPath(1) + ); + DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder( + derivedField, + queryShardContext.getMapperService().getIndexAnalyzers(), + null, + IGNORE_MALFORMED_SETTING.getDefault(queryShardContext.getIndexSettings().getSettings()) + ); + return builder.build(builderContext).fieldType(); + } + + private DerivedFieldType resolveUsingMappings(String name) { + if (queryShardContext != null && queryShardContext.getMapperService() != null) { + MappedFieldType mappedFieldType = queryShardContext.getMapperService().fieldType(name); + if (mappedFieldType instanceof DerivedFieldType) { + return (DerivedFieldType) mappedFieldType; + } + } + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedField.java b/server/src/main/java/org/opensearch/index/mapper/DerivedField.java index b502e41cbb97b..249b60a1c4ec5 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedField.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedField.java @@ -30,7 +30,7 @@ public class DerivedField implements Writeable, ToXContentFragment { private final String name; private final String type; private final Script script; - private String sourceIndexedField; + private String prefilterField; private Map properties; private Boolean ignoreMalformed; private String format; @@ -49,7 +49,7 @@ public DerivedField(StreamInput in) throws IOException { if (in.readBoolean()) { properties = in.readMap(); } - sourceIndexedField = in.readOptionalString(); + prefilterField = in.readOptionalString(); format = in.readOptionalString(); ignoreMalformed = in.readOptionalBoolean(); } @@ -67,7 +67,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(true); out.writeMap(properties); } - out.writeOptionalString(sourceIndexedField); + out.writeOptionalString(prefilterField); out.writeOptionalString(format); out.writeOptionalBoolean(ignoreMalformed); } @@ -81,8 +81,8 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par if (properties != null) { builder.field("properties", properties); } - if (sourceIndexedField != null) { - builder.field("source_indexed_field", sourceIndexedField); + if (prefilterField != null) { + builder.field("prefilter_field", prefilterField); } if (format != null) { builder.field("format", format); @@ -110,8 +110,15 @@ public Map getProperties() { return properties; } - public String getSourceIndexedField() { - return sourceIndexedField; + public String getNestedFieldType(String fieldName) { + if (properties == null || properties.isEmpty() || fieldName == null || fieldName.isEmpty()) { + return null; + } + return (String) properties.get(fieldName); + } + + public String getPrefilterField() { + return prefilterField; } public String getFormat() { @@ -126,8 +133,8 @@ public void setProperties(Map properties) { this.properties = properties; } - public void setSourceIndexedField(String sourceIndexedField) { - this.sourceIndexedField = sourceIndexedField; + public void setPrefilterField(String prefilterField) { + this.prefilterField = prefilterField; } public void setFormat(String format) { @@ -140,7 +147,7 @@ public void setIgnoreMalformed(boolean ignoreMalformed) { @Override public int hashCode() { - return Objects.hash(name, type, script, sourceIndexedField, properties, ignoreMalformed, format); + return Objects.hash(name, type, script, prefilterField, properties, ignoreMalformed, format); } @Override @@ -155,7 +162,7 @@ public boolean equals(Object obj) { return Objects.equals(name, other.name) && Objects.equals(type, other.type) && Objects.equals(script, other.script) - && Objects.equals(sourceIndexedField, other.sourceIndexedField) + && Objects.equals(prefilterField, other.prefilterField) && Objects.equals(properties, other.properties) && Objects.equals(ignoreMalformed, other.ignoreMalformed) && Objects.equals(format, other.format); diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java index c6ae71320c35c..e08e46e1ea969 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java @@ -9,16 +9,20 @@ package org.opensearch.index.mapper; import org.apache.lucene.index.IndexableField; +import org.opensearch.common.time.DateFormatter; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.script.Script; import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.Function; +import static org.opensearch.index.mapper.DateFieldMapper.getDefaultDateTimeFormatter; + /** * A field mapper for derived fields * @@ -28,6 +32,8 @@ public class DerivedFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "derived"; + protected final IndexAnalyzers indexAnalyzers; + private static DerivedFieldMapper toType(FieldMapper in) { return (DerivedFieldMapper) in; } @@ -38,62 +44,180 @@ private static DerivedFieldMapper toType(FieldMapper in) { * @opensearch.internal */ public static class Builder extends ParametrizedFieldMapper.Builder { - // TODO: The type of parameter may change here if the actual underlying FieldType object is needed - private final Parameter type = Parameter.stringParam("type", false, m -> toType(m).type, ""); + private final Parameter type = Parameter.stringParam("type", true, m -> toType(m).type, ""); + private final IndexAnalyzers indexAnalyzers; + private final boolean defaultIgnoreMalformed; + private final DateFormatter defaultDateFormatter; private final Parameter