From 87556c02b0e669aa5f94edca34024394a2645778 Mon Sep 17 00:00:00 2001 From: Harsha Vamsi Kalluri Date: Tue, 17 Oct 2023 09:54:44 -0700 Subject: [PATCH] Adds various Query overrides to Keyword Field The keywordfield mapper provides access to various query types, e.g. the termsQuery, fuzzyQuery. These are inherited as is from the StringType. But we do not take into account the fact that keyword fields can have doc_values enabled. This PR adds the ability for various queries to first check if doc_values are enabled and if so out-source the work to lucene to decide if it's better to use index values or doc_values when running queries. Signed-off-by: Harsha Vamsi Kalluri --- CHANGELOG.md | 3 +- .../test/search/340_keyword_doc_values.yml | 46 ++++ .../index/mapper/KeywordFieldMapper.java | 238 +++++++++++++++++- .../index/mapper/MappedFieldType.java | 24 ++ .../subphase/highlight/CustomQueryScorer.java | 3 + .../index/mapper/KeywordFieldTypeTests.java | 144 ++++++++++- .../query/MultiMatchQueryBuilderTests.java | 8 +- 7 files changed, 447 insertions(+), 19 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c4563a216974..c928e8461e372 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Return 409 Conflict HTTP status instead of 503 on failure to concurrently execute snapshots ([#8986](https://github.com/opensearch-project/OpenSearch/pull/5855)) - Add task completion count in search backpressure stats API ([#10028](https://github.com/opensearch-project/OpenSearch/pull/10028/)) - Performance improvement for Datetime field caching ([#4558](https://github.com/opensearch-project/OpenSearch/issues/4558)) +- Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) ### Deprecated @@ -131,4 +132,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml new file mode 100644 index 0000000000000..8829e7b100fdd --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml @@ -0,0 +1,46 @@ +--- +"search on keyword fields with doc_values enabled": + - do: + indices.create: + index: test + body: + mappings: + properties: + "some_keyword": + type: "keyword" + index: true + doc_values: true + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {"_index": "test", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data" }' + - '{ "index": { "_index": "test", "_id": "2" }}' + - '{ "some_keyword": "400" }' + - '{ "index": { "_index": "test", "_id": "3" } }' + - '{ "some_keyword": "5" }' + + - do: + search: + index: test + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + index: test + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total.value: 2 } diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 92ee8067ee4a0..c14b2c92c89c3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -38,11 +38,24 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Operations; +import org.opensearch.OpenSearchException; import org.opensearch.common.Nullable; +import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.search.AutomatonQueries; +import org.opensearch.common.unit.Fuzziness; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; @@ -62,6 +75,8 @@ import java.util.Objects; import java.util.function.Supplier; +import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; + /** * A field mapper for keywords. This mapper accepts strings and indexes them as-is. * @@ -317,7 +332,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S @Override public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { if (format != null) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't " + "support formats."); } return new SourceValueFetcher(name(), context, nullValue) { @@ -372,17 +387,226 @@ protected BytesRef indexedValueForSearch(Object value) { return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); } + @Override + public Query termsQuery(List values, QueryShardContext context) { + failIfNotIndexedAndNoDocValues(); + // has index and doc_values enabled + if (isSearchable() && hasDocValues()) { + BytesRef[] bytesRefs = new BytesRef[values.size()]; + for (int i = 0; i < bytesRefs.length; i++) { + bytesRefs[i] = indexedValueForSearch(values.get(i)); + } + Query indexQuery = new TermInSetQuery(name(), bytesRefs); + Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + // if we only have doc_values enabled, we construct a new query with doc_values re-written + if (hasDocValues()) { + BytesRef[] bytesRefs = new BytesRef[values.size()]; + for (int i = 0; i < bytesRefs.length; i++) { + bytesRefs[i] = indexedValueForSearch(values.get(i)); + } + return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs); + } + // has index enabled, we're going to return the query as is + return super.termsQuery(values, context); + } + + @Override + public Query prefixQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + QueryShardContext context + ) { + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[prefix] queries cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false. For optimised prefix queries on text " + + "fields please enable [index_prefixes]." + ); + } + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context); + Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + if (caseInsensitive) { + return AutomatonQueries.caseInsensitivePrefixQuery( + (new Term(name(), indexedValueForSearch(value))), + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return new PrefixQuery(new Term(name(), indexedValueForSearch(value)), MultiTermQuery.DOC_VALUES_REWRITE); + } + return super.prefixQuery(value, method, caseInsensitive, context); + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[regexp] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." + ); + } + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + Query dvQuery = super.regexpQuery( + value, + syntaxFlags, + matchFlags, + maxDeterminizedStates, + MultiTermQuery.DOC_VALUES_REWRITE, + context + ); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return new RegexpQuery( + new Term(name(), indexedValueForSearch(value)), + syntaxFlags, + matchFlags, + RegexpQuery.DEFAULT_PROVIDER, + maxDeterminizedStates, + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + } + + @Override + public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[range] queries on [text] or [keyword] fields cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false." + ); + } + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + Query indexQuery = new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); + Query dvQuery = new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper, + MultiTermQuery.DOC_VALUES_REWRITE + ); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper, + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + failIfNotIndexedAndNoDocValues(); + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[fuzzy] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." + ); + } + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); + Query dvQuery = super.fuzzyQuery( + value, + fuzziness, + prefixLength, + maxExpansions, + transpositions, + MultiTermQuery.DOC_VALUES_REWRITE, + context + ); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return new FuzzyQuery( + new Term(name(), indexedValueForSearch(value)), + fuzziness.asDistance(BytesRefs.toString(value)), + prefixLength, + maxExpansions, + transpositions, + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); + } + @Override public Query wildcardQuery( String value, @Nullable MultiTermQuery.RewriteMethod method, - boolean caseInsensitve, + boolean caseInsensitive, QueryShardContext context ) { - // keyword field types are always normalized, so ignore case sensitivity and force normalize the wildcard + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[wildcard] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." + ); + } + failIfNotIndexedAndNoDocValues(); + // keyword field types are always normalized, so ignore case sensitivity and force normalize the + // wildcard // query text - return super.wildcardQuery(value, method, caseInsensitve, true, context); + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context); + Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + Term term; + value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer()); + term = new Term(name(), value); + if (caseInsensitive) { + return AutomatonQueries.caseInsensitiveWildcardQuery(term, method); + } + return new WildcardQuery(term, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, MultiTermQuery.DOC_VALUES_REWRITE); + } + return super.wildcardQuery(value, method, caseInsensitive, true, context); } + } private final boolean indexed; @@ -422,8 +646,10 @@ protected KeywordFieldMapper( this.indexAnalyzers = builder.indexAnalyzers; } - /** Values that have more chars than the return value of this method will - * be skipped at parsing time. */ + /** + * Values that have more chars than the return value of this method will + * be skipped at parsing time. + */ public int ignoreAbove() { return ignoreAbove; } diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index 997835f712038..62acad99074c2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -269,6 +269,21 @@ public Query fuzzyQuery( ); } + // Fuzzy Query with re-write method + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + throw new IllegalArgumentException( + "Can only use fuzzy queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]" + ); + } + // Case sensitive form of prefix query public final Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { return prefixQuery(value, method, false, context); @@ -433,6 +448,15 @@ protected final void failIfNotIndexed() { } } + protected final void failIfNotIndexedAndNoDocValues() { + // we fail if a field is both not indexed and does not have doc_values enabled + if (isIndexed == false && hasDocValues() == false) { + throw new IllegalArgumentException( + "Cannot search on field [" + name() + "] since it is both not indexed," + " and does not have doc_values enabled." + ); + } + } + public boolean eagerGlobalOrdinals() { return eagerGlobalOrdinals; } diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/CustomQueryScorer.java b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/CustomQueryScorer.java index d0fb0f6da53c4..89c77b3cd403f 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/CustomQueryScorer.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/CustomQueryScorer.java @@ -33,6 +33,7 @@ package org.opensearch.search.fetch.subphase.highlight; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.WeightedSpanTerm; @@ -104,6 +105,8 @@ protected void extract(Query query, float boost, Map t super.extract(((FunctionScoreQuery) query).getSubQuery(), boost, terms); } else if (query instanceof OpenSearchToParentBlockJoinQuery) { super.extract(((OpenSearchToParentBlockJoinQuery) query).getChildQuery(), boost, terms); + } else if (query instanceof IndexOrDocValuesQuery) { + super.extract(((IndexOrDocValuesQuery) query).getIndexQuery(), boost, terms); } else { super.extract(query, boost, terms); } diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index 0d8ef6784a28c..393c448330142 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -44,13 +44,17 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Operations; import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; @@ -128,14 +132,29 @@ public void testTermsQuery() { List terms = new ArrayList<>(); terms.add(new BytesRef("foo")); terms.add(new BytesRef("bar")); - assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); + Query expected = new IndexOrDocValuesQuery( + new TermInSetQuery("field", terms), + new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms) + ); + assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null)); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + Query expectedIndex = new TermInSetQuery("field", terms); + assertEquals(expectedIndex, onlyIndexed.termsQuery(Arrays.asList("foo", "bar"), null)); + + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query expectedDocValues = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms); + assertEquals(expectedDocValues, onlyDocValues.termsQuery(Arrays.asList("foo", "bar"), null)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() + ); } public void testExistsQuery() { @@ -157,9 +176,36 @@ public void testExistsQuery() { public void testRangeQuery() { MappedFieldType ft = new KeywordFieldType("field"); + + Query indexExpected = new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false); + Query dvExpected = new TermRangeQuery( + "field", + BytesRefs.toBytesRef("foo"), + BytesRefs.toBytesRef("bar"), + true, + false, + MultiTermQuery.DOC_VALUES_REWRITE + ); + + Query expected = new IndexOrDocValuesQuery(indexExpected, dvExpected); + Query actual = ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC); + assertEquals(expected, actual); + + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC)); + + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals(dvExpected, onlyDocValues.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) + ); + assertEquals( - new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), - ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() ); OpenSearchException ee = expectThrows( @@ -175,16 +221,37 @@ public void testRangeQuery() { public void testRegexpQuery() { MappedFieldType ft = new KeywordFieldType("field"); assertEquals( - new RegexpQuery(new Term("field", "foo.*")), + new IndexOrDocValuesQuery( + new RegexpQuery(new Term("field", "foo.*")), + new RegexpQuery(new Term("field", "foo.*"), 0, 0, RegexpQuery.DEFAULT_PROVIDER, 10, MultiTermQuery.DOC_VALUES_REWRITE) + ), ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) ); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query indexExpected = new RegexpQuery(new Term("field", "foo.*")); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + + Query dvExpected = new RegexpQuery( + new Term("field", "foo.*"), + 0, + 0, + RegexpQuery.DEFAULT_PROVIDER, + 10, + MultiTermQuery.DOC_VALUES_REWRITE + ); + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals(dvExpected, onlyDocValues.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() + ); OpenSearchException ee = expectThrows( OpenSearchException.class, @@ -200,12 +267,26 @@ public void testFuzzyQuery() { ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) ); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query indexExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC)); + + Query dvExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE); + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals( + dvExpected, + onlyDocValues.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC) + ); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) + () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC) + ); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); OpenSearchException ee = expectThrows( OpenSearchException.class, @@ -214,6 +295,47 @@ public void testFuzzyQuery() { assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); } + public void testWildCardQuery() { + MappedFieldType ft = new KeywordFieldType("field"); + Query expected = new IndexOrDocValuesQuery( + new WildcardQuery(new Term("field", new BytesRef("foo*"))), + new WildcardQuery( + new Term("field", new BytesRef("foo*")), + Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, + MultiTermQuery.DOC_VALUES_REWRITE + ) + ); + assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + + Query indexExpected = new WildcardQuery(new Term("field", new BytesRef("foo*"))); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + + Query dvExpected = new WildcardQuery( + new Term("field", new BytesRef("foo*")), + Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, + MultiTermQuery.DOC_VALUES_REWRITE + ); + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals(dvExpected, onlyDocValues.wildcardQuery("foo*", MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) + ); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() + ); + + OpenSearchException ee = expectThrows( + OpenSearchException.class, + () -> ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_DISALLOW_EXPENSIVE) + ); + assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + public void testNormalizeQueries() { MappedFieldType ft = new KeywordFieldType("field"); assertEquals(new TermQuery(new Term("field", new BytesRef("FOO"))), ft.termQuery("FOO", null)); diff --git a/server/src/test/java/org/opensearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/MultiMatchQueryBuilderTests.java index e1391393f44fa..39f5bb313fe9e 100644 --- a/server/src/test/java/org/opensearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/MultiMatchQueryBuilderTests.java @@ -304,10 +304,16 @@ public void testToQueryBooleanPrefixMultipleFields() throws IOException { } else if (disjunct instanceof PrefixQuery) { final PrefixQuery secondDisjunct = (PrefixQuery) disjunct; assertThat(secondDisjunct.getPrefix(), equalTo(new Term(KEYWORD_FIELD_NAME, "foo bar"))); + } else if (disjunct instanceof IndexOrDocValuesQuery) { + final IndexOrDocValuesQuery iodvqDisjunct = (IndexOrDocValuesQuery) disjunct; + assertThat(iodvqDisjunct.getIndexQuery().toString(), equalTo("mapped_string_2:foo bar*")); } else { throw new AssertionError(); } - assertThat(disjunct, either(instanceOf(BooleanQuery.class)).or(instanceOf(PrefixQuery.class))); + assertThat( + disjunct, + either(instanceOf(BooleanQuery.class)).or(instanceOf(PrefixQuery.class)).or(instanceOf(IndexOrDocValuesQuery.class)) + ); } } }