From 7b1c2c78c16d566da93001d0f6a8224b2de9e8d0 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 2 Jan 2024 16:04:40 -0800 Subject: [PATCH] Implementation for match_only_text field (#11039) * Implementation for match_only_text field Signed-off-by: Rishabh Maurya * Fix build failures Signed-off-by: Rishabh Maurya * Fix bugs Signed-off-by: Rishabh Maurya * Added mapper tests, stil failing on prefix and phrase tests Signed-off-by: Rishabh Maurya * Disable index prefix and phrase mapper Signed-off-by: Rishabh Maurya * Added unit tests for phrase and multiphrase query validation Signed-off-by: Rishabh Maurya * Add unit tests for prefix and prefix phrase queries Signed-off-by: Rishabh Maurya * Add a test to cover 3 word with synonym match phrase prefix query Signed-off-by: Rishabh Maurya * Add unit test for SourceFieldMatchQuery Signed-off-by: Rishabh Maurya * Added test for _source disabled case Signed-off-by: Rishabh Maurya * Add unit test for missing field Signed-off-by: Rishabh Maurya * more validation tests and changelog update Signed-off-by: Rishabh Maurya * Added integration tests for match_only_text replicating text field integ tests Signed-off-by: Rishabh Maurya * Added skip section in integ test to fix mixed cluster failures Signed-off-by: Rishabh Maurya * remove unused import Signed-off-by: Rishabh Maurya * Address PR comments Signed-off-by: Rishabh Maurya * fix integ tests Signed-off-by: Rishabh Maurya * Fix flaky test due to random indexwriter Signed-off-by: Rishabh Maurya * pr comment: header modification Signed-off-by: Rishabh Maurya * Address PR comments Signed-off-by: Rishabh Maurya * addded change to the right section of CHANGELOG Signed-off-by: Rishabh Maurya * overriding the textFieldType before every test Signed-off-by: Rishabh Maurya * rename @Before method Signed-off-by: Rishabh Maurya * update changelog description Signed-off-by: Rishabh Maurya --------- Signed-off-by: Rishabh Maurya --- CHANGELOG.md | 1 + .../11_match_field_match_only_text.yml | 70 +++ .../20_ngram_search_field_match_only_text.yml | 144 ++++++ ...ram_highligthing_field_match_only_text.yml | 137 ++++++ .../40_query_string_field_match_only_text.yml | 59 +++ ...default_analyzer_field_match_only_text.yml | 42 ++ ...es_with_synonyms_field_match_only_text.yml | 348 ++++++++++++++ ...60_synonym_graph_field_match_only_text.yml | 209 ++++++++ .../70_intervals_field_match_only_text.yml | 67 +++ .../20_phrase_field_match_only_text.yml | 238 +++++++++ .../20_highlighting_field_match_only_text.yml | 201 ++++++++ .../20_query_string_field_match_only_text.yml | 53 +++ .../30_sig_terms_field_match_only_text.yml | 76 +++ .../90_sig_text_field_match_only_text.yml | 155 ++++++ .../20_highlighting_field_match_only_text.yml | 137 ++++++ .../160_exists_query_match_only_text.yml | 119 +++++ ...00_phrase_search_field_match_only_text.yml | 67 +++ ...atch_bool_prefix_field_match_only_text.yml | 282 +++++++++++ ...disallow_queries_field_match_only_text.yml | 141 ++++++ .../10_basic_field_match_only_field.yml | 92 ++++ .../index/mapper/MappedFieldType.java | 13 + .../mapper/MatchOnlyTextFieldMapper.java | 312 ++++++++++++ .../index/mapper/TextFieldMapper.java | 35 +- .../index/query/SourceFieldMatchQuery.java | 160 +++++++ .../opensearch/index/search/MatchQuery.java | 10 +- .../index/search/MultiMatchQuery.java | 4 +- .../org/opensearch/indices/IndicesModule.java | 2 + .../MatchOnlyTextFieldAnalyzerModeTests.java | 16 + .../mapper/MatchOnlyTextFieldMapperTests.java | 450 ++++++++++++++++++ .../mapper/MatchOnlyTextFieldTypeTests.java | 31 ++ .../mapper/TextFieldAnalyzerModeTests.java | 22 +- .../index/mapper/TextFieldMapperTests.java | 222 ++++----- .../index/mapper/TextFieldTypeTests.java | 32 +- .../query/SourceFieldMatchQueryTests.java | 173 +++++++ .../index/mapper/MapperServiceTestCase.java | 4 +- .../aggregations/AggregatorTestCase.java | 4 +- 36 files changed, 3959 insertions(+), 169 deletions(-) create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml create mode 100644 modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml create mode 100644 server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java create mode 100644 server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java create mode 100644 server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index a167127a7d795..026606ff57d65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170)) - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591)) - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) +- Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml new file mode 100644 index 0000000000000..40ff2c2f4cdbe --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml @@ -0,0 +1,70 @@ +# integration tests for queries with specific analysis chains + +"match query with stacked stems": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + # Tests the match query stemmed tokens are "stacked" on top of the unstemmed + # versions in the same position. + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + index: + tokenizer: standard + filter: [lowercase] + search: + rest_total_hits_as_int: true + tokenizer: standard + filter: [lowercase, keyword_repeat, porter_stem, unique_stem] + filter: + unique_stem: + type: unique + only_on_same_position: true + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: { "text": "the fox runs across the street" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 1} + + - do: + index: + index: test + id: 2 + body: { "text": "run fox run" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 2} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml new file mode 100644 index 0000000000000..95b648dee47c8 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml @@ -0,0 +1,144 @@ +"ngram search": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + analysis: + analyzer: + my_analyzer: + tokenizer: standard + filter: [my_ngram] + filter: + my_ngram: + type: ngram + min: 2, + max: 2 + mappings: + properties: + text: + type: match_only_text + analyzer: my_analyzer + + - do: + index: + index: test + id: 1 + body: { "text": "foo bar baz" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: foa + - match: {hits.total: 1} + +--- +"testNGramCopyField": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + max_ngram_diff: 9 + analysis: + analyzer: + my_ngram_analyzer: + tokenizer: my_ngram_tokenizer + tokenizer: + my_ngram_tokenizer: + type: ngram + min: 1, + max: 10 + token_chars: [] + mappings: + properties: + origin: + type: match_only_text + copy_to: meta + meta: + type: match_only_text + analyzer: my_ngram_analyzer + + - do: + index: + index: test + id: 1 + body: { "origin": "C.A1234.5678" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234.56 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + term: + meta: + value: a1234 + - match: {hits.total: 0} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: a1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml new file mode 100644 index 0000000000000..597f55679a2c6 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml @@ -0,0 +1,137 @@ +"ngram highlighting": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + index.max_ngram_diff: 19 + analysis: + tokenizer: + my_ngramt: + type: ngram + min_gram: 1 + max_gram: 20 + token_chars: letter,digit + filter: + my_ngram: + type: ngram + min_gram: 1 + max_gram: 20 + analyzer: + name2_index_analyzer: + tokenizer: whitespace + filter: [my_ngram] + name_index_analyzer: + tokenizer: my_ngramt + name_search_analyzer: + tokenizer: whitespace + mappings: + properties: + name: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name_index_analyzer + search_analyzer: name_search_analyzer + name2: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name2_index_analyzer + search_analyzer: name_search_analyzer + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: logicacmg ehemals avinci - the know how company + name2: logicacmg ehemals avinci - the know how company + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica m + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica ma + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica m + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica ma + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..ddebb1d76acbc --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml @@ -0,0 +1,59 @@ +--- +"Test query string with snowball": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + index: + index: test + id: 1 + body: { field: foo bar} + + - do: + indices.refresh: + index: [test] + + - do: + indices.validate_query: + index: test + q: field:bars + analyzer: snowball + + - is_true: valid + + - do: + search: + rest_total_hits_as_int: true + index: test + q: field:bars + analyzer: snowball + + - match: {hits.total: 1} + + - do: + explain: + index: test + id: 1 + q: field:bars + analyzer: snowball + + - is_true: matched + + - do: + count: + index: test + q: field:bars + analyzer: snowball + + - match: {count : 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml new file mode 100644 index 0000000000000..97f3fb65e94a2 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml @@ -0,0 +1,42 @@ +--- +"Test default search analyzer is applied": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + index.analysis.analyzer.default.type: simple + index.analysis.analyzer.default_search.type: german + mappings: + properties: + body: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + body: Ich lese die Bücher + + - do: + indices.refresh: + index: [ test ] + + - do: + search: + index: test + q: "body:Bücher" + + - match: { hits.total.value: 0 } + + - do: + search: + index: test + q: "body:Bücher" + analyzer: simple + + - match: { hits.total.value: 1 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml new file mode 100644 index 0000000000000..0c263a47a38e6 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml @@ -0,0 +1,348 @@ +--- +"Test common terms query with stacked tokens": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + features: "allowed_warnings" + + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + syns: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + syns: + tokenizer: standard + filter: [ "syns" ] + mappings: + properties: + field1: + type: match_only_text + analyzer: syns + field2: + type: match_only_text + analyzer: syns + + - do: + index: + index: test + id: 3 + body: + field1: quick lazy huge brown pidgin + field2: the quick lazy huge brown fox jumps over the tree + + - do: + index: + index: test + id: 1 + body: + field1: the quick brown fox + + - do: + index: + index: test + id: 2 + body: + field1: the quick lazy huge brown fox jumps over the tree + refresh: true + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast huge fox + minimum_should_match: + low_freq: 3 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 5 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 6 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the quick brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + minimum_should_match: 3 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + multi_match: + query: the fast brown + fields: [ "field1", "field2" ] + cutoff_frequency: 3 + operator: and + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.2._id: "2" } + +--- +"Test match query with synonyms - see #3881 for extensive description of the issue": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + synonym: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + index: + type: custom + tokenizer: standard + filter: lowercase + search: + rest_total_hits_as_int: true + type: custom + tokenizer: standard + filter: [ lowercase, synonym ] + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: + text: quick brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fast + operator: and + - match: { hits.total: 1 } + + - do: + index: + index: test + id: 2 + body: + text: fast brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 2 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml new file mode 100644 index 0000000000000..91a8b1509517e --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml @@ -0,0 +1,209 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + index: + number_of_shards: 1 # keep scoring stable + analysis: + filter: + syns: + type: synonym + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + graph_syns: + type: synonym_graph + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + analyzer: + lower_syns: + type: custom + tokenizer: standard + filter: [ lowercase, syns ] + lower_graph_syns: + type: custom + tokenizer: standard + filter: [ lowercase, graph_syns ] + mappings: + properties: + field: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + text: say wtf happened foo + - do: + index: + index: test + id: 2 + body: + text: bar baz what the fudge man + + - do: + index: + index: test + id: 3 + body: + text: wtf + + - do: + index: + index: test + id: 4 + body: + text: what is the name for fudge + + - do: + index: + index: test + id: 5 + body: + text: bar two three + + - do: + index: + index: test + id: 6 + body: + text: bar baz two three + refresh: true + +--- +"simple multiterm phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_graph_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms + +--- +"simple multiterm and": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_graph_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + +--- +"minimum should match": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + auto_generate_synonyms_phrase_query: false + - match: { hits.total: 6 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + minimum_should_match: 80% + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "6" } + - match: { hits.hits.2._id: "1" } + +--- +"multiterm synonyms phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: wtf + operator: and + analyzer: lower_graph_syns + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + +--- +"phrase prefix": + - do: + index: + index: test + id: 7 + body: + text: "WTFD!" + + - do: + index: + index: test + id: 8 + body: + text: "Weird Al's WHAT THE FUDGESICLE" + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase_prefix: + text: + query: wtf + analyzer: lower_graph_syns + - match: { hits.total: 5 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "7" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.3._id: "8" } + - match: { hits.hits.4._id: "2" } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml new file mode 100644 index 0000000000000..9792c9d2695ea --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml @@ -0,0 +1,67 @@ +# integration tests for intervals queries using analyzers +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + text_en: + type: match_only_text + analyzer: english + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet and raining cats and dogs", + "text_en" : "Outside it is cold and wet and raining cats and dogs"}' + +--- +"Test use_field": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + use_field: text_en + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml new file mode 100644 index 0000000000000..aff2b3f11101c --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml @@ -0,0 +1,238 @@ +# Integration tests for the phrase suggester with a few analyzers + +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + body: + tokenizer: standard + filter: [lowercase] + bigram: + tokenizer: standard + filter: [lowercase, bigram] + ngram: + tokenizer: standard + filter: [lowercase, ngram] + reverse: + tokenizer: standard + filter: [lowercase, reverse] + filter: + bigram: + type: shingle + output_unigrams: false + min_shingle_size: 2 + max_shingle_size: 2 + ngram: + type: shingle + output_unigrams: true + min_shingle_size: 2 + max_shingle_size: 2 + mappings: + properties: + body: + type: match_only_text + analyzer: body + fields: + bigram: + type: match_only_text + analyzer: bigram + ngram: + type: match_only_text + analyzer: ngram + reverse: + type: match_only_text + analyzer: reverse + + - do: + bulk: + index: test + refresh: true + body: | + { "index": {} } + { "body": "Xorr the God-Jewel" } + { "index": {} } + { "body": "Xorn" } + { "index": {} } + { "body": "Arthur, King of the Britons" } + { "index": {} } + { "body": "Sir Lancelot the Brave" } + { "index": {} } + { "body": "Patsy, Arthur's Servant" } + { "index": {} } + { "body": "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot" } + { "index": {} } + { "body": "Sir Bedevere the Wise" } + { "index": {} } + { "body": "Sir Galahad the Pure" } + { "index": {} } + { "body": "Miss Islington, the Witch" } + { "index": {} } + { "body": "Zoot" } + { "index": {} } + { "body": "Leader of Robin's Minstrels" } + { "index": {} } + { "body": "Old Crone" } + { "index": {} } + { "body": "Frank, the Historian" } + { "index": {} } + { "body": "Frank's Wife" } + { "index": {} } + { "body": "Dr. Piglet" } + { "index": {} } + { "body": "Dr. Winston" } + { "index": {} } + { "body": "Sir Robin (Stand-in)" } + { "index": {} } + { "body": "Knight Who Says Ni" } + { "index": {} } + { "body": "Police sergeant who stops the film" } + +--- +"sorts by score": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.ngram + min_word_length: 1 + suggest_mode: always + + - match: {suggest.test.0.options.0.text: xorr the god jewel} + - match: {suggest.test.0.options.1.text: xorn the god jewel} + +--- +"breaks ties by sorting terms": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + # This runs the suggester without bigrams so we can be sure of the sort order + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body + analyzer: body + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body + min_word_length: 1 + suggest_mode: always + + # The scores are identical but xorn comes first because it sorts first + - match: {suggest.test.0.options.0.text: xorn the god jewel} + - match: {suggest.test.0.options.1.text: xorr the god jewel} + - match: {suggest.test.0.options.0.score: $body.suggest.test.0.options.0.score} + +--- +"fails when asked to run on a field without unigrams": + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + +--- +"doesn't fail when asked to run on a field without unigrams when force_unigrams=false": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + force_unigrams: false + + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + force_unigrams: false + +--- +"reverse suggestions": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: Artur, Ging of the Britons + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.reverse + min_word_length: 1 + suggest_mode: always + pre_filter: reverse + post_filter: reverse + + - match: {suggest.test.0.options.0.text: arthur king of the britons} diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..3cb8e09c70aed --- /dev/null +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -0,0 +1,201 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + text_field: + type: match_only_text + analyzer: simple + + - do: + index: + index: test + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + text_field: "quick brown fox jump lazy dog" + + - do: + indices.refresh: {} + +--- +"phrase query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"bool prefix query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fo" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query 1 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fo" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: null } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 2 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 3 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 4 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy d" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..085c5633ac72b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -0,0 +1,53 @@ +--- +"validate_query with query_string parameters": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + indices.validate_query: + index: test + q: bar + df: field + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + default_operator: AND + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:BA* + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: number:foo + lenient: true + + - is_true: valid diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml new file mode 100644 index 0000000000000..7a96536a2e261 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -0,0 +1,76 @@ +--- +"Default index": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: true + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_terms": {"significant_terms": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_terms.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml new file mode 100644 index 0000000000000..bc41f157dfdc4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml @@ -0,0 +1,155 @@ +--- +"Default index": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + +--- +"Dedup noise": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good noisewords1 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good noisewords2 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad noisewords3 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad noisewords4 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad noisewords5 gb1 gb2 gb3 gb4 gb5 gb6", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad noisewords6 gb1 gb2 gb3 gb4 gb5 gb6", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad noisewords7 b1 b2 b3 b4 b5 b6", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text", "filter_duplicate_text": true}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - length: { aggregations.class.buckets.0.sig_text.buckets: 1 } + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + - length: { aggregations.class.buckets.1.sig_text.buckets: 1 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..7100d620bf19e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -0,0 +1,137 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + _source: + excludes: ["nested.stored_only"] + properties: + nested: + type: nested + properties: + field: + type: text + fields: + vectors: + type: text + term_vector: "with_positions_offsets" + postings: + type: text + index_options: "offsets" + stored: + type: match_only_text + store: true + stored_only: + type: match_only_text + store: true + - do: + index: + index: test + id: 1 + refresh: true + body: + nested: + field : "The quick brown fox is brown." + stored : "The quick brown fox is brown." + stored_only : "The quick brown fox is brown." + +--- +"Unified highlighter": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.field", "nested.field.vectors", "nested.field.postings" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.field.vectors: {} + nested.field.postings: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.vectors.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.postings.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.stored", "nested.stored_only" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.stored: {} + nested.stored_only: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields and disabled source": + - do: + indices.create: + index: disabled_source + body: + mappings: + _source: + enabled: false + properties: + nested: + type: nested + properties: + field: + type: match_only_text + stored_only: + type: match_only_text + store: true + - do: + index: + index: disabled_source + id: 1 + refresh: true + body: + nested: + field: "The quick brown fox is brown." + stored_only: "The quick brown fox is brown." + + - do: + search: + index: disabled_source + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: ["nested.field", "nested.stored_only"] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.stored_only: {} + + - is_false: hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown."} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml new file mode 100644 index 0000000000000..03626236604a1 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml @@ -0,0 +1,119 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + features: ["headers"] + + - do: + indices.create: + index: test + body: + mappings: + dynamic: false + properties: + match_only_text: + type: match_only_text + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 1 + body: + match_only_text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 2 + body: + match_only_text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 3 + routing: "route_me" + body: + match_only_text: "foo bar" + + - do: + index: + index: "test" + id: 4 + body: {} + + - do: + indices.create: + index: test-unmapped + body: + mappings: + dynamic: false + properties: + unrelated: + type: keyword + + - do: + index: + index: "test-unmapped" + id: 1 + body: + unrelated: "foo" + + - do: + indices.create: + index: test-empty + body: + mappings: + dynamic: false + properties: + match_only_text: + type: match_only_text + + - do: + indices.refresh: + index: [test, test-unmapped, test-empty] + +--- +"Test exists query on mapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 3} + +--- +"Test exists query on unmapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test-unmapped + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} + +--- +"Test exists query on match_only_text field in empty index": + - do: + search: + rest_total_hits_as_int: true + index: test-empty + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml new file mode 100644 index 0000000000000..a41b8d353e3e9 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml @@ -0,0 +1,67 @@ +--- +"search with indexed phrases": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: { text: "peter piper picked a peck of pickled peppers" } + + - do: + indices.refresh: + index: [test] + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: + query: "peter piper" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + q: '"peter piper"~1' + df: text + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "peter piper picked" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "piper" + + - match: {hits.total: 1} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml new file mode 100644 index 0000000000000..fc4e9f9de0f38 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -0,0 +1,282 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + my_field1: + type: match_only_text + my_field2: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + my_field1: "brown fox jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 2 + body: + my_field1: "brown emu jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 3 + body: + my_field1: "jumparound" + my_field2: "emu" + + - do: + index: + index: test + id: 4 + body: + my_field1: "dog" + my_field2: "brown fox jump lazy" + + - do: + indices.refresh: {} + +--- +"minimum should match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + minimum_should_match: 3 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "BROWN dog" + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + +--- +"operator": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + operator: AND + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field2: + query: "xylophoen foo" + fuzziness: 1 + prefix_length: 1 + max_expansions: 10 + fuzzy_transpositions: true + fuzzy_rewrite: constant_score + + - match: { hits.total: 2 } + - match: { hits.hits.0._source.my_field2: "xylophone" } + - match: { hits.hits.1._source.my_field2: "xylophone" } + +--- +"multi_match single field complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + +--- +"multi_match single field partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + +--- +"multi_match multiple fields complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + +--- +"multi_match multiple fields partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump laz" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + +--- +"multi_match multiple fields with analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "BROWN FOX JUMP dog" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with minimum_should_match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + minimum_should_match: 4 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "dob nomatch" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + fuzziness: 1 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with slop throws exception": + + - do: + catch: /\[slop\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + slop: 1 + +--- +"multi_match multiple fields with cutoff_frequency throws exception": + + - do: + catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + cutoff_frequency: 0.001 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml new file mode 100644 index 0000000000000..f4faf87eb83cc --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml @@ -0,0 +1,141 @@ +--- +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + fields: + raw: + type: keyword + nested1: + type: nested + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "1"}}' + - '{"text" : "Some like it hot, some like it cold", "nested1": [{"foo": "bar1"}]}' + - '{"index": {"_index": "test", "_id": "2"}}' + - '{"text" : "Its cold outside, theres no kind of atmosphere", "nested1": [{"foo": "bar2"}]}' + - '{"index": {"_index": "test", "_id": "3"}}' + - '{"text" : "Baby its cold there outside", "nested1": [{"foo": "bar3"}]}' + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet", "nested1": [{"foo": "bar4"}]}' + +--- +teardown: + + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: null + +--- +"Test disallow expensive queries": + + ### Check for initial setting = null -> false + - do: + cluster.get_settings: + flat_settings: true + + - is_false: search.allow_expensive_queries + + ### Update setting to false + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: "false" + flat_settings: true + + - match: {transient: {search.allow_expensive_queries: "false"}} + + ### Prefix + - do: + catch: /\[prefix\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false. For optimised prefix queries on text fields please enable \[index_prefixes\]./ + search: + index: test + body: + query: + prefix: + text: + value: out + + ### Fuzzy + - do: + catch: /\[fuzzy\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + fuzzy: + text: + value: outwide + + ### Regexp + - do: + catch: /\[regexp\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + regexp: + text: + value: .*ou.*id.* + + ### Wildcard + - do: + catch: /\[wildcard\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + wildcard: + text: + value: out?ide + + ### Range on text + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text: + gte: "theres" + + ### Range on keyword + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text.raw: + gte : "Outside it is cold and wet" + + ### Nested + - do: + catch: /\[joining\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + nested: + path: "nested1" + query: + bool: + must: [{"match" : {"nested1.foo" : "bar2"}}] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml new file mode 100644 index 0000000000000..cc15796e4697f --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml @@ -0,0 +1,92 @@ +--- +"Search shards aliases with and without filters": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test_index + body: + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + field: + type: match_only_text + aliases: + test_alias_no_filter: {} + test_alias_filter_1: + filter: + term: + field : value1 + test_alias_filter_2: + filter: + term: + field : value2 + + - do: + search_shards: + index: test_alias_no_filter + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - is_true: indices.test_index + - is_false: indices.test_index.filter + - match: { indices.test_index.aliases: [test_alias_no_filter]} + + - do: + search_shards: + index: test_alias_filter_1 + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1] } + - match: { indices.test_index.filter.term.field.value: value1 } + - lte: { indices.test_index.filter.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.term.field.boost: 1.0 } + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_filter_2"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2]} + - length: { indices.test_index.filter.bool.should: 2 } + - lte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - lte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - match: { indices.test_index.filter.bool.adjust_pure_negative: true} + - lte: { indices.test_index.filter.bool.boost: 1.0 } + - gte: { indices.test_index.filter.bool.boost: 1.0 } + + - do: + search_shards: + index: "test*" + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_no_filter]} + - is_false: indices.test_index.filter diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index da62ddfd7017d..66d4654e543a2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -359,18 +359,31 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionInc ); } + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + return phraseQuery(stream, slop, enablePositionIncrements); + } + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + return multiPhraseQuery(stream, slop, enablePositionIncrements); + } + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { throw new IllegalArgumentException( "Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + return phrasePrefixQuery(stream, slop, maxExpansions); + } + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { throw new IllegalArgumentException( "Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java new file mode 100644 index 0000000000000..fb97f8c309a70 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -0,0 +1,312 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.opensearch.Version; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.SourceFieldMatchQuery; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +/** + * A specialized type of TextFieldMapper which disables the positions and norms to save on storage and executes phrase queries, which requires + * positional data, in a slightly less efficient manner using the {@link org.opensearch.index.query.SourceFieldMatchQuery}. + */ +public class MatchOnlyTextFieldMapper extends TextFieldMapper { + + public static final FieldType FIELD_TYPE = new FieldType(); + public static final String CONTENT_TYPE = "match_only_text"; + private final String indexOptions = FieldMapper.indexOptionToString(FIELD_TYPE.indexOptions()); + private final boolean norms = FIELD_TYPE.omitNorms() == false; + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + static { + FIELD_TYPE.setTokenized(true); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setStoreTermVectors(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + protected MatchOnlyTextFieldMapper( + String simpleName, + FieldType fieldType, + MatchOnlyTextFieldType mappedFieldType, + TextFieldMapper.PrefixFieldMapper prefixFieldMapper, + TextFieldMapper.PhraseFieldMapper phraseFieldMapper, + MultiFields multiFields, + CopyTo copyTo, + Builder builder + ) { + + super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder); + } + + @Override + public ParametrizedFieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName(), this.indexCreatedVersion, this.indexAnalyzers).init(this); + } + + /** + * Builder class for constructing the MatchOnlyTextFieldMapper. + */ + public static class Builder extends TextFieldMapper.Builder { + final Parameter indexOptions = indexOptions(m -> ((MatchOnlyTextFieldMapper) m).indexOptions); + + private static Parameter indexOptions(Function initializer) { + return Parameter.restrictedStringParam("index_options", false, initializer, "docs"); + } + + final Parameter norms = norms(m -> ((MatchOnlyTextFieldMapper) m).norms); + final Parameter indexPhrases = Parameter.boolParam( + "index_phrases", + false, + m -> ((MatchOnlyTextFieldType) m.mappedFieldType).indexPhrases, + false + ).setValidator(v -> { + if (v == true) { + throw new MapperParsingException("Index phrases cannot be enabled on for match_only_text field. Use text field instead"); + } + }); + + final Parameter indexPrefixes = new Parameter<>( + "index_prefixes", + false, + () -> null, + TextFieldMapper::parsePrefixConfig, + m -> Optional.ofNullable(((MatchOnlyTextFieldType) m.mappedFieldType).prefixFieldType) + .map(p -> new PrefixConfig(p.minChars, p.maxChars)) + .orElse(null) + ).acceptsNull().setValidator(v -> { + if (v != null) { + throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead"); + } + }); + + private static Parameter norms(Function initializer) { + return Parameter.boolParam("norms", false, initializer, false) + .setMergeValidator((o, n) -> o == n || (o && n == false)) + .setValidator(v -> { + if (v == true) { + throw new MapperParsingException("Norms cannot be enabled on for match_only_text field"); + } + }); + } + + public Builder(String name, IndexAnalyzers indexAnalyzers) { + super(name, indexAnalyzers); + } + + public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name, indexCreatedVersion, indexAnalyzers); + } + + @Override + public MatchOnlyTextFieldMapper build(BuilderContext context) { + FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors); + MatchOnlyTextFieldType tft = buildFieldType(fieldType, context); + return new MatchOnlyTextFieldMapper( + name, + fieldType, + tft, + buildPrefixMapper(context, fieldType, tft), + buildPhraseMapper(fieldType, tft), + multiFieldsBuilder.build(this, context), + copyTo.build(), + this + ); + } + + @Override + protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) > 0) { + throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled"); + } + if (positionIncrementGap.get() != POSITION_INCREMENT_GAP_USE_ANALYZER) { + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) < 0) { + throw new IllegalArgumentException( + "Cannot set position_increment_gap on field [" + name + "] without indexing enabled" + ); + } + indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get()); + searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get()); + searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get()); + } + TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer); + MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( + buildFullName(context), + index.getValue(), + fieldType.stored(), + tsi, + meta.getValue() + ); + ft.setIndexAnalyzer(indexAnalyzer); + ft.setEagerGlobalOrdinals(eagerGlobalOrdinals.getValue()); + ft.setBoost(boost.getValue()); + if (fieldData.getValue()) { + ft.setFielddata(true, freqFilter.getValue()); + } + return ft; + } + + @Override + protected List> getParameters() { + return Arrays.asList( + index, + store, + indexOptions, + norms, + termVectors, + analyzers.indexAnalyzer, + analyzers.searchAnalyzer, + analyzers.searchQuoteAnalyzer, + similarity, + positionIncrementGap, + fieldData, + freqFilter, + eagerGlobalOrdinals, + indexPhrases, + indexPrefixes, + boost, + meta + ); + } + } + + /** + * The specific field type for MatchOnlyTextFieldMapper + * + * @opensearch.internal + */ + public static final class MatchOnlyTextFieldType extends TextFieldType { + private final boolean indexPhrases = false; + + private PrefixFieldType prefixFieldType; + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + public MatchOnlyTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map meta) { + super(name, indexed, stored, tsi, meta); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException { + PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term term : phraseQuery.getTerms()) { + builder.add(new TermQuery(term), BooleanClause.Occur.FILTER); + } + return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, context); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term[] terms : multiPhraseQuery.getTermArrays()) { + if (terms.length > 1) { + // Multiple terms in the same position, creating a disjunction query for it and + // adding it to conjunction query + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : terms) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } else { + builder.add(new TermQuery(terms[0]), BooleanClause.Occur.FILTER); + } + } + return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, context); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions); + List> termArray = getTermsFromTokenStream(stream); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < termArray.size(); i++) { + if (i == termArray.size() - 1) { + // last element of the term Array is a prefix, thus creating a prefix query for it and adding it to + // conjunction query + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); + mqb.add(termArray.get(i).toArray(new Term[0])); + builder.add(mqb, BooleanClause.Occur.FILTER); + } else { + if (termArray.get(i).size() > 1) { + // multiple terms in the same position, creating a disjunction query for it and + // adding it to conjunction query + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : termArray.get(i)) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } else { + builder.add(new TermQuery(termArray.get(i).get(0)), BooleanClause.Occur.FILTER); + } + } + } + return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context); + } + + private List> getTermsFromTokenStream(TokenStream stream) throws IOException { + final List> termArray = new ArrayList<>(); + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + List currentTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + if (currentTerms.isEmpty() == false) { + termArray.add(List.copyOf(currentTerms)); + } + currentTerms.clear(); + } + currentTerms.add(new Term(name(), termAtt.getBytesRef())); + } + termArray.add(List.copyOf(currentTerms)); + return termArray; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index 1d0d1ae2bd899..d0e041e68a81d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -110,7 +110,7 @@ public class TextFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "text"; - private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; + protected static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; private static final String FAST_PHRASE_SUFFIX = "._index_phrase"; /** @@ -152,11 +152,11 @@ private static TextFieldMapper toType(FieldMapper in) { * * @opensearch.internal */ - private static final class PrefixConfig implements ToXContent { + protected static final class PrefixConfig implements ToXContent { final int minChars; final int maxChars; - private PrefixConfig(int minChars, int maxChars) { + PrefixConfig(int minChars, int maxChars) { this.minChars = minChars; this.maxChars = maxChars; if (minChars > maxChars) { @@ -198,7 +198,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } } - private static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) { + static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) { if (propNode == null) { return null; } @@ -214,7 +214,7 @@ private static PrefixConfig parsePrefixConfig(String propName, ParserContext par * * @opensearch.internal */ - private static final class FielddataFrequencyFilter implements ToXContent { + protected static final class FielddataFrequencyFilter implements ToXContent { final double minFreq; final double maxFreq; final int minSegmentSize; @@ -280,15 +280,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder { private final Version indexCreatedVersion; - private final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true); - private final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); + protected final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true); + protected final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); final Parameter similarity = TextParams.similarity(m -> toType(m).similarity); final Parameter indexOptions = TextParams.indexOptions(m -> toType(m).indexOptions); final Parameter norms = TextParams.norms(true, m -> toType(m).fieldType.omitNorms() == false); final Parameter termVectors = TextParams.termVectors(m -> toType(m).termVectors); - final Parameter positionIncrementGap = Parameter.intParam( "position_increment_gap", false, @@ -332,8 +331,8 @@ public static class Builder extends ParametrizedFieldMapper.Builder { .orElse(null) ).acceptsNull(); - private final Parameter boost = Parameter.boostParam(); - private final Parameter> meta = Parameter.metaParam(); + protected final Parameter boost = Parameter.boostParam(); + protected final Parameter> meta = Parameter.metaParam(); final TextParams.Analyzers analyzers; @@ -395,7 +394,7 @@ protected List> getParameters() { ); } - private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); @@ -420,7 +419,7 @@ private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context return ft; } - private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) { + protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) { if (indexPrefixes.get() == null) { return null; } @@ -454,7 +453,7 @@ private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fi return new PrefixFieldMapper(pft, prefixFieldType); } - private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) { + protected PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) { if (indexPhrases.get() == false) { return null; } @@ -683,7 +682,7 @@ public Query existsQuery(QueryShardContext context) { * * @opensearch.internal */ - private static final class PhraseFieldMapper extends FieldMapper { + protected static final class PhraseFieldMapper extends FieldMapper { PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType) { super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); @@ -710,7 +709,7 @@ protected String contentType() { * * @opensearch.internal */ - private static final class PrefixFieldMapper extends FieldMapper { + protected static final class PrefixFieldMapper extends FieldMapper { protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType) { super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); @@ -968,15 +967,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S } - private final FieldType fieldType; + protected final FieldType fieldType; private final PrefixFieldMapper prefixFieldMapper; private final PhraseFieldMapper phraseFieldMapper; private final SimilarityProvider similarity; private final String indexOptions; private final String termVectors; private final int positionIncrementGap; - private final Version indexCreatedVersion; - private final IndexAnalyzers indexAnalyzers; + protected final Version indexCreatedVersion; + protected final IndexAnalyzers indexAnalyzers; private final FielddataFrequencyFilter freqFilter; protected TextFieldMapper( diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java new file mode 100644 index 0000000000000..b0be20e417efe --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.SourceValueFetcher; +import org.opensearch.search.lookup.LeafSearchLookup; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * A query that matches against each document from the parent query by filtering using the source field values. + * Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically. + */ +public class SourceFieldMatchQuery extends Query { + private final Query delegateQuery; + private final Query filter; + private final SearchLookup lookup; + private final MappedFieldType fieldType; + private final SourceValueFetcher valueFetcher; + private final QueryShardContext context; + + /** + * Constructs a SourceFieldMatchQuery. + * + * @param delegateQuery The parent query to use to find matches. + * @param filter The query used to filter further by running against field value fetched using _source field. + * @param fieldType The mapped field type. + * @param context The QueryShardContext to get lookup and valueFetcher + */ + public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, QueryShardContext context) { + this.delegateQuery = delegateQuery; + this.filter = filter; + this.fieldType = fieldType; + this.context = context; + this.lookup = context.lookup(); + if (!context.documentMapper("").sourceMapper().enabled()) { + throw new IllegalArgumentException( + "SourceFieldMatchQuery error: unable to fetch fields from _source field: _source is disabled in the mappings " + + "for index [" + + context.index().getName() + + "]" + ); + } + this.valueFetcher = (SourceValueFetcher) fieldType.valueFetcher(context, lookup, null); + } + + @Override + public void visit(QueryVisitor visitor) { + delegateQuery.visit(visitor); + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + Query rewritten = indexSearcher.rewrite(delegateQuery); + if (rewritten == delegateQuery) { + return this; + } + return new SourceFieldMatchQuery(rewritten, filter, fieldType, context); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + + Weight weight = delegateQuery.createWeight(searcher, ScoreMode.TOP_DOCS, boost); + + return new ConstantScoreWeight(this, boost) { + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + + Scorer scorer = weight.scorer(context); + if (scorer == null) { + // none of the docs are matching + return null; + } + DocIdSetIterator approximation = scorer.iterator(); + LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context); + TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() { + leafSearchLookup.setDocument(approximation.docID()); + List values = valueFetcher.fetchValues(leafSearchLookup.source()); + // Missing fields won't count as match. Can we use a default value for missing field? + if (values.isEmpty()) { + return false; + } + MemoryIndex memoryIndex = new MemoryIndex(); + for (Object value : values) { + memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); + } + float score = memoryIndex.search(filter); + return score > 0.0f; + } + + @Override + public float matchCost() { + // arbitrary cost + return 1000f; + } + }; + return new ConstantScoreScorer(this, score(), ScoreMode.TOP_DOCS, twoPhase); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // It is fine to cache if delegate query weight is cacheable since additional logic here + // is just a filter on top of delegate query matches + return weight.isCacheable(ctx); + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (sameClassAs(o) == false) { + return false; + } + SourceFieldMatchQuery other = (SourceFieldMatchQuery) o; + return Objects.equals(this.delegateQuery, other.delegateQuery) + && Objects.equals(this.filter, other.filter) + && Objects.equals(this.fieldType, other.fieldType) + && Objects.equals(this.context, other.context); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), delegateQuery, filter, fieldType, context); + } + + @Override + public String toString(String f) { + return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) + " ], filter query: [ " + filter.toString(f) + "])"; + } +} diff --git a/server/src/main/java/org/opensearch/index/search/MatchQuery.java b/server/src/main/java/org/opensearch/index/search/MatchQuery.java index 9e2b79971369d..ec6755ea25703 100644 --- a/server/src/main/java/org/opensearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MatchQuery.java @@ -67,6 +67,7 @@ import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.TextFieldMapper; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.support.QueryParsers; @@ -701,7 +702,7 @@ private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClaus protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { checkForPositions(field); - return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -714,7 +715,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { try { checkForPositions(field); - return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -728,7 +729,7 @@ private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, in if (positionCount > 1) { checkForPositions(field); } - return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -887,6 +888,9 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in private void checkForPositions(String field) { if (fieldType.getTextSearchInfo().hasPositions() == false) { + if (fieldType instanceof MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) { + return; + } throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); } } diff --git a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java index 241f05af2c512..8c0c87e8c9d0c 100644 --- a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java @@ -248,7 +248,7 @@ protected Query newPrefixQuery(Term term) { protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } @@ -261,7 +261,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 5c2137ec742a4..eea5dbbf57f6c 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -59,6 +59,7 @@ import org.opensearch.index.mapper.IpFieldMapper; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.MetadataFieldMapper; import org.opensearch.index.mapper.NestedPathFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; @@ -158,6 +159,7 @@ public static Map getMappers(List mappe mappers.put(nanoseconds.type(), DateFieldMapper.NANOS_PARSER); mappers.put(IpFieldMapper.CONTENT_TYPE, IpFieldMapper.PARSER); mappers.put(TextFieldMapper.CONTENT_TYPE, TextFieldMapper.PARSER); + mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); mappers.put(KeywordFieldMapper.CONTENT_TYPE, KeywordFieldMapper.PARSER); mappers.put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser()); mappers.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser()); diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java new file mode 100644 index 0000000000000..13cb279418fa8 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +public class MatchOnlyTextFieldAnalyzerModeTests extends TextFieldAnalyzerModeTests { + @Override + ParametrizedFieldMapper.TypeParser getTypeParser() { + return MatchOnlyTextFieldMapper.PARSER; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java new file mode 100644 index 0000000000000..580f8cccc9af5 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -0,0 +1,450 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.core.common.Strings; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.SourceFieldMatchQuery; +import org.opensearch.index.search.MatchQuery; +import org.junit.Before; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.core.Is.is; + +public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { + + @Before + public void setupMatchOnlyTextFieldMapper() { + textFieldName = "match_only_text"; + } + + @Override + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(fieldMapping(this::minimalMapping).toString(), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals("1234", fields[0].stringValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + @Override + public void testEnableStore() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("store", true))); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0].fieldType().stored()); + } + + @Override + public void testIndexOptions() throws IOException { + Map supportedOptions = new HashMap<>(); + supportedOptions.put("docs", IndexOptions.DOCS); + + Map unsupportedOptions = new HashMap<>(); + unsupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS); + unsupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + unsupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + + for (String option : supportedOptions.keySet()) { + XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); + mapping.endObject().endObject().endObject(); + + DocumentMapper mapper = createDocumentMapper(mapping); + String serialized = Strings.toString(MediaTypeRegistry.JSON, mapper); + assertThat(serialized, containsString("\"docs\":{\"type\":\"match_only_text\"}")); + + ParsedDocument doc = mapper.parse(source(b -> { b.field(option, "1234"); })); + + IndexOptions options = supportedOptions.get(option); + IndexableField[] fields = doc.rootDoc().getFields(option); + assertEquals(1, fields.length); + assertEquals(options, fields[0].fieldType().indexOptions()); + } + + for (String option : unsupportedOptions.keySet()) { + XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); + mapping.endObject().endObject().endObject(); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(mapping)); + assertThat( + e.getMessage(), + containsString( + "Failed to parse mapping [_doc]: Unknown value [" + option + "] for field [index_options] - accepted values are [docs]" + ) + ); + } + } + + @Override + public void testAnalyzedFieldPositionIncrementWithoutPositions() { + for (String indexOptions : List.of("docs")) { + try { + createDocumentMapper( + fieldMapping( + b -> b.field("type", textFieldName).field("index_options", indexOptions).field("position_increment_gap", 10) + ) + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void testBWCSerialization() throws IOException {} + + @Override + public void testPositionIncrementGap() throws IOException {} + + @Override + public void testDefaultPositionIncrementGap() throws IOException {} + + @Override + public void testMinimalToMaximal() throws IOException {} + + @Override + public void testIndexPrefixMapping() throws IOException { + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping( + b -> b.field("type", textFieldName) + .field("analyzer", "standard") + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + ) + ) + ); + assertEquals( + "Failed to parse mapping [_doc]: Index prefixes cannot be enabled on for match_only_text field. Use text field instead", + e.getMessage() + ); + } + + @Override + public void testIndexPrefixIndexTypes() throws IOException { + // not supported and asserted the expected behavior in testIndexPrefixMapping + } + + @Override + public void testFastPhrasePrefixes() throws IOException { + // not supported and asserted the expected behavior in testIndexPrefixMapping + } + + public void testPhrasePrefixes() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field"); + { + b.field("type", textFieldName); + b.field("analyzer", "my_stop_analyzer"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + b.startObject("synfield"); + { + b.field("type", textFieldName); + b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "here")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "three")); + mqbFilter.add(new Term("field", "words")); + mqbFilter.add(new Term("field", "here")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.setSlop(1); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "singleton")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(mqb, BooleanClause.Occur.FILTER).build(), + mqb, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, is(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "stopword")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "sparkle")); + mqbFilter.add(new Term[] { new Term("field", "stopword") }, 2); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "motor")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setPhraseSlop(1); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "two dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "two")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.setSlop(1); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "three dogs word"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term("synfield", "word")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "three")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.add(new Term("synfield", "word")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "three")), BooleanClause.Occur.FILTER) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.FILTER + ) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + } + + @Override + public void testFastPhraseMapping() throws IOException { + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { + b.startObject("field") + .field("type", textFieldName) + .field("analyzer", "my_stop_analyzer") + .field("index_phrases", true) + .endObject(); + // "standard" will be replaced with MockSynonymAnalyzer + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").field("index_phrases", true).endObject(); + }))); + assertEquals( + "Failed to parse mapping [_doc]: Index phrases cannot be enabled on for match_only_text field. Use text field instead", + e.getMessage() + ); + } + + @Override + public void testSimpleMerge() throws IOException {} + + public void testPhraseQuery() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field").field("type", textFieldName).field("analyzer", "my_stop_analyzer").endObject(); + // "standard" will be replaced with MockSynonymAnalyzer + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery("field", "two", "words"), + mapperService.fieldType("field"), + queryShardContext + ); + + assertThat(q, is(expectedQuery)); + Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q4, is(new TermQuery(new Term("field", "singleton")))); + + Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "here")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery("field", "three", "words", "here"), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q2, is(expectedQuery)); + + Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(2).toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery(2, "field", "two", "words"), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q3, is(expectedQuery)); + + Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "stopword")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build(), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q5, is(expectedQuery)); + + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs"); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.FILTER + ) + .build(), + new MultiPhraseQuery.Builder().add(new Term("synfield", "motor")) + .add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }, 1) + .build(), + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q6, is(expectedQuery)); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java new file mode 100644 index 0000000000000..51234fa04ddc2 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.opensearch.common.lucene.Lucene; + +public class MatchOnlyTextFieldTypeTests extends TextFieldTypeTests { + + @Override + TextFieldMapper.TextFieldType createFieldType(boolean searchable) { + TextSearchInfo tsi = new TextSearchInfo( + TextFieldMapper.Defaults.FIELD_TYPE, + null, + Lucene.STANDARD_ANALYZER, + Lucene.STANDARD_ANALYZER + ); + return new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "field", + searchable, + false, + tsi, + ParametrizedFieldMapper.Parameter.metaParam().get() + ); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java index 93bed729f0974..83a3bdc580ae6 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java @@ -59,6 +59,9 @@ import static org.mockito.Mockito.when; public class TextFieldAnalyzerModeTests extends OpenSearchTestCase { + ParametrizedFieldMapper.TypeParser getTypeParser() { + return TextFieldMapper.PARSER; + } private static Map defaultAnalyzers() { Map analyzers = new HashMap<>(); @@ -101,7 +104,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() { IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); + getTypeParser().parse("field", fieldNode, parserContext); // check that "analyzer" set to something that only supports AnalysisMode.SEARCH_TIME or AnalysisMode.INDEX_TIME is blocked AnalysisMode mode = randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME); @@ -110,7 +113,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() { indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); fieldNode.put("analyzer", "my_analyzer"); - MapperException ex = expectThrows(MapperException.class, () -> { TextFieldMapper.PARSER.parse("name", fieldNode, parserContext); }); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("name", fieldNode, parserContext); }); assertThat( ex.getMessage(), containsString("analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run") @@ -136,7 +139,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() { IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("textField", fieldNode, parserContext); + getTypeParser().parse("textField", fieldNode, parserContext); // check that "analyzer" set to AnalysisMode.INDEX_TIME is blocked mode = AnalysisMode.INDEX_TIME; @@ -151,10 +154,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() { if (settingToTest.equals("search_quote_analyzer")) { fieldNode.put("search_analyzer", "standard"); } - MapperException ex = expectThrows( - MapperException.class, - () -> { TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); } - ); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("field", fieldNode, parserContext); }); assertEquals( "analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run in search time mode.", ex.getMessage() @@ -174,10 +174,7 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() { analyzers.put("my_analyzer", new NamedAnalyzer("my_named_analyzer", AnalyzerScope.INDEX, createAnalyzerWithMode(mode))); IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - MapperException ex = expectThrows( - MapperException.class, - () -> { TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); } - ); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("field", fieldNode, parserContext); }); assertThat( ex.getMessage(), containsString("analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run") @@ -193,7 +190,6 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() { indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); + getTypeParser().parse("field", fieldNode, parserContext); } - } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index a9b902e121bda..a22bfa5e845b1 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -81,6 +81,7 @@ import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.search.MatchQuery; +import org.junit.Before; import java.io.IOException; import java.util.Arrays; @@ -95,6 +96,13 @@ public class TextFieldMapperTests extends MapperTestCase { + public String textFieldName = "text"; + + @Before + public void setup() { + textFieldName = "text"; + } + @Override protected void writeFieldValue(XContentBuilder builder) throws IOException { builder.value(1234); @@ -169,30 +177,34 @@ protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerConflictCheck("index", b -> b.field("index", false)); checker.registerConflictCheck("store", b -> b.field("store", true)); - checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); - checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); - checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); + if (!textFieldName.equals("match_only_text")) { + checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); + checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); + checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); + } checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean")); checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword")); checker.registerConflictCheck("term_vector", b -> b.field("term_vector", "yes")); checker.registerConflictCheck("position_increment_gap", b -> b.field("position_increment_gap", 10)); - // norms can be set from true to false, but not vice versa - checker.registerConflictCheck("norms", fieldMapping(b -> { - b.field("type", "text"); - b.field("norms", false); - }), fieldMapping(b -> { - b.field("type", "text"); - b.field("norms", true); - })); - checker.registerUpdateCheck(b -> { - b.field("type", "text"); - b.field("norms", true); - }, b -> { - b.field("type", "text"); - b.field("norms", false); - }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); + if (!textFieldName.equals(MatchOnlyTextFieldMapper.CONTENT_TYPE)) { + // norms can be set from true to false, but not vice versa + checker.registerConflictCheck("norms", fieldMapping(b -> { + b.field("type", textFieldName); + b.field("norms", false); + }), fieldMapping(b -> { + b.field("type", textFieldName); + b.field("norms", true); + })); + checker.registerUpdateCheck(b -> { + b.field("type", textFieldName); + b.field("norms", true); + }, b -> { + b.field("type", textFieldName); + b.field("norms", false); + }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); + } checker.registerUpdateCheck(b -> b.field("boost", 2.0), m -> assertEquals(m.fieldType().boost(), 2.0, 0)); @@ -237,7 +249,7 @@ public TokenStream create(TokenStream tokenStream) { @Override protected void minimalMapping(XContentBuilder b) throws IOException { - b.field("type", "text"); + b.field("type", textFieldName); } public void testDefaults() throws IOException { @@ -262,7 +274,7 @@ public void testDefaults() throws IOException { public void testBWCSerialization() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { - b.field("type", "text"); + b.field("type", textFieldName); b.field("fielddata", true); b.startObject("fields"); { @@ -312,7 +324,7 @@ public void testBWCSerialization() throws IOException { } public void testEnableStore() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("store", true))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("store", true))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); @@ -320,14 +332,14 @@ public void testEnableStore() throws IOException { } public void testDisableIndex() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("index", false))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("index", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); } public void testDisableNorms() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("norms", false))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("norms", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); @@ -343,7 +355,7 @@ public void testIndexOptions() throws IOException { XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); for (String option : supportedOptions.keySet()) { - mapping.startObject(option).field("type", "text").field("index_options", option).endObject(); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); } mapping.endObject().endObject().endObject(); @@ -389,7 +401,7 @@ public void testDefaultPositionIncrementGap() throws IOException { public void testPositionIncrementGap() throws IOException { final int positionIncrementGap = randomIntBetween(1, 1000); MapperService mapperService = createMapperService( - fieldMapping(b -> b.field("type", "text").field("position_increment_gap", positionIncrementGap)) + fieldMapping(b -> b.field("type", textFieldName).field("position_increment_gap", positionIncrementGap)) ); ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" }))); @@ -409,16 +421,16 @@ public void testPositionIncrementGap() throws IOException { public void testSearchAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( - b -> b.field("type", "text").field("analyzer", "standard").field("search_analyzer", "keyword") + b -> b.field("type", textFieldName).field("analyzer", "standard").field("search_analyzer", "keyword") ); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); // special case: default index analyzer - mapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "default").field("search_analyzer", "keyword")); + mapping = fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "default").field("search_analyzer", "keyword")); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); // special case: default search analyzer - mapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "keyword").field("search_analyzer", "default")); + mapping = fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "keyword").field("search_analyzer", "default")); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); XContentBuilder builder = MediaTypeRegistry.JSON.contentBuilder(); @@ -436,7 +448,7 @@ public void testSearchAnalyzerSerialization() throws IOException { public void testSearchQuoteAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .field("search_analyzer", "standard") .field("search_quote_analyzer", "keyword") @@ -445,7 +457,7 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { // special case: default index/search analyzer mapping = fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "default") .field("search_analyzer", "default") .field("search_quote_analyzer", "keyword") @@ -456,27 +468,27 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { public void testTermVectors() throws IOException { XContentBuilder mapping = mapping( b -> b.startObject("field1") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "no") .endObject() .startObject("field2") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "yes") .endObject() .startObject("field3") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_offsets") .endObject() .startObject("field4") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions") .endObject() .startObject("field5") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions_offsets") .endObject() .startObject("field6") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions_offsets_payloads") .endObject() ); @@ -526,7 +538,9 @@ public void testTermVectors() throws IOException { } public void testEagerGlobalOrdinals() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("eager_global_ordinals", true))); + DocumentMapper mapper = createDocumentMapper( + fieldMapping(b -> b.field("type", textFieldName).field("eager_global_ordinals", true)) + ); FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field"); assertTrue(fieldMapper.fieldType().eagerGlobalOrdinals()); @@ -539,13 +553,13 @@ public void testFielddata() throws IOException { })); assertThat(e.getMessage(), containsString("Text fields are not optimised for operations that require per-document field data")); - MapperService enabledMapper = createMapperService(fieldMapping(b -> b.field("type", "text").field("fielddata", true))); + MapperService enabledMapper = createMapperService(fieldMapping(b -> b.field("type", textFieldName).field("fielddata", true))); enabledMapper.fieldType("field").fielddataBuilder("test", () -> { throw new UnsupportedOperationException(); }); // no exception // this time e = expectThrows( MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", false).field("fielddata", true))) + () -> createMapperService(fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("fielddata", true))) ); assertThat(e.getMessage(), containsString("Cannot enable fielddata on a [text] field that is not indexed")); } @@ -553,7 +567,7 @@ public void testFielddata() throws IOException { public void testFrequencyFilter() throws IOException { MapperService mapperService = createMapperService( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("fielddata", true) .startObject("fielddata_frequency_filter") .field("min", 2d) @@ -571,17 +585,22 @@ public void testFrequencyFilter() throws IOException { public void testNullConfigValuesFail() throws MapperParsingException { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("analyzer", (String) null))) + () -> createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("analyzer", (String) null))) + ); + assertThat( + e.getMessage(), + containsString("[analyzer] on mapper [field] of type [" + textFieldName + "] must not have a [null] value") ); - assertThat(e.getMessage(), containsString("[analyzer] on mapper [field] of type [text] must not have a [null] value")); } public void testNotIndexedFieldPositionIncrement() { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("index", false).field("position_increment_gap", 10))) + () -> createDocumentMapper( + fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("position_increment_gap", 10)) + ) ); - assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); + assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field]")); } public void testAnalyzedFieldPositionIncrementWithoutPositions() { @@ -589,7 +608,9 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() { Exception e = expectThrows( MapperParsingException.class, () -> createDocumentMapper( - fieldMapping(b -> b.field("type", "text").field("index_options", indexOptions).field("position_increment_gap", 10)) + fieldMapping( + b -> b.field("type", textFieldName).field("index_options", indexOptions).field("position_increment_gap", 10) + ) ) ); assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); @@ -600,7 +621,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -615,7 +636,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -632,7 +653,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -649,7 +670,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -666,7 +687,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -682,62 +703,18 @@ public void testIndexPrefixIndexTypes() throws IOException { } public void testNestedIndexPrefixes() throws IOException { - { - MapperService mapperService = createMapperService( - mapping( - b -> b.startObject("object") - .field("type", "object") - .startObject("properties") - .startObject("field") - .field("type", "text") - .startObject("index_prefixes") - .endObject() - .endObject() - .endObject() - .endObject() - ) - ); - MappedFieldType textField = mapperService.fieldType("object.field"); - assertNotNull(textField); - assertThat(textField, instanceOf(TextFieldType.class)); - MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); - assertEquals(prefix.name(), "object.field._index_prefix"); - FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("object.field._index_prefix"); - assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); - assertFalse(mapper.fieldType.storeTermVectorOffsets()); - } - { - MapperService mapperService = createMapperService( - mapping( - b -> b.startObject("body") - .field("type", "text") - .startObject("fields") - .startObject("with_prefix") - .field("type", "text") - .startObject("index_prefixes") - .endObject() - .endObject() - .endObject() - .endObject() - ) - ); - MappedFieldType textField = mapperService.fieldType("body.with_prefix"); - assertNotNull(textField); - assertThat(textField, instanceOf(TextFieldType.class)); - MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); - assertEquals(prefix.name(), "body.with_prefix._index_prefix"); - FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("body.with_prefix._index_prefix"); - assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); - assertFalse(mapper.fieldType.storeTermVectorOffsets()); - } } public void testFastPhraseMapping() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { - b.startObject("field").field("type", "text").field("analyzer", "my_stop_analyzer").field("index_phrases", true).endObject(); + b.startObject("field") + .field("type", textFieldName) + .field("analyzer", "my_stop_analyzer") + .field("index_phrases", true) + .endObject(); // "standard" will be replaced with MockSynonymAnalyzer - b.startObject("synfield").field("type", "text").field("analyzer", "standard").field("index_phrases", true).endObject(); + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").field("index_phrases", true).endObject(); })); QueryShardContext queryShardContext = createQueryShardContext(mapperService); @@ -808,14 +785,16 @@ protected TokenStreamComponents createComponents(String fieldName) { Exception e = expectThrows( MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", "false").field("index_phrases", true))) + () -> createMapperService( + fieldMapping(b -> b.field("type", textFieldName).field("index", "false").field("index_phrases", true)) + ) ); assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]")); e = expectThrows( MapperParsingException.class, () -> createMapperService( - fieldMapping(b -> b.field("type", "text").field("index_options", "freqs").field("index_phrases", true)) + fieldMapping(b -> b.field("type", textFieldName).field("index_options", "freqs").field("index_phrases", true)) ) ); assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled")); @@ -826,7 +805,7 @@ public void testIndexPrefixMapping() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 2) @@ -844,29 +823,29 @@ public void testIndexPrefixMapping() throws IOException { { DocumentMapper mapper = createDocumentMapper( - fieldMapping(b -> b.field("type", "text").field("analyzer", "standard").startObject("index_prefixes").endObject()) + fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "standard").startObject("index_prefixes").endObject()) ); assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:5")); } { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").nullField("index_prefixes"))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).nullField("index_prefixes"))); assertNull(mapper.mappers().getMapper("field._index_prefix")); } { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 1).field("max_chars", 10).endObject(); - b.startObject("fields").startObject("_index_prefix").field("type", "text").endObject().endObject(); + b.startObject("fields").startObject("_index_prefix").field("type", textFieldName).endObject().endObject(); }))); assertThat(e.getMessage(), containsString("Field [field._index_prefix] is defined more than once")); } { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 11).field("max_chars", 10).endObject(); }))); assertThat(e.getMessage(), containsString("min_chars [11] must be less than max_chars [10]")); @@ -874,7 +853,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 0).field("max_chars", 10).endObject(); }))); assertThat(e.getMessage(), containsString("min_chars [0] must be greater than zero")); @@ -882,7 +861,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 1).field("max_chars", 25).endObject(); }))); assertThat(e.getMessage(), containsString("max_chars [25] must be less than 20")); @@ -890,7 +869,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard").field("index", false); + b.field("type", textFieldName).field("analyzer", "standard").field("index", false); b.startObject("index_prefixes").endObject(); }))); assertThat(e.getMessage(), containsString("Cannot set index_prefixes on unindexed field [field]")); @@ -901,14 +880,14 @@ public void testFastPhrasePrefixes() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { b.startObject("field"); { - b.field("type", "text"); + b.field("type", textFieldName); b.field("analyzer", "my_stop_analyzer"); b.startObject("index_prefixes").field("min_chars", 2).field("max_chars", 10).endObject(); } b.endObject(); b.startObject("synfield"); { - b.field("type", "text"); + b.field("type", textFieldName); b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer b.field("index_phrases", true); b.startObject("index_prefixes").field("min_chars", 2).field("max_chars", 10).endObject(); @@ -999,7 +978,7 @@ public void testFastPhrasePrefixes() throws IOException { public void testSimpleMerge() throws IOException { XContentBuilder startingMapping = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", true) + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", true) ); MapperService mapperService = createMapperService(startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); @@ -1008,19 +987,28 @@ public void testSimpleMerge() throws IOException { assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); XContentBuilder differentPrefix = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").field("min_chars", "3").endObject().field("index_phrases", true) + b -> b.field("type", textFieldName) + .startObject("index_prefixes") + .field("min_chars", "3") + .endObject() + .field("index_phrases", true) ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPrefix)); assertThat(e.getMessage(), containsString("Cannot update parameter [index_prefixes]")); XContentBuilder differentPhrases = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", false) + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", false) ); e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPhrases)); assertThat(e.getMessage(), containsString("Cannot update parameter [index_phrases]")); XContentBuilder newField = mapping(b -> { - b.startObject("field").field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", true).endObject(); + b.startObject("field") + .field("type", textFieldName) + .startObject("index_prefixes") + .endObject() + .field("index_phrases", true) + .endObject(); b.startObject("other_field").field("type", "keyword").endObject(); }); merge(mapperService, newField); diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java index 0592a972db5e9..9c177bbec61fd 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java @@ -66,35 +66,39 @@ public class TextFieldTypeTests extends FieldTypeTestCase { - private static TextFieldType createFieldType() { - return new TextFieldType("field"); + TextFieldType createFieldType(boolean searchabe) { + if (searchabe) { + return new TextFieldType("field"); + } else { + return new TextFieldType("field", false, false, Collections.emptyMap()); + } } public void testIsAggregatableDependsOnFieldData() { - TextFieldType ft = createFieldType(); + TextFieldType ft = createFieldType(true); assertFalse(ft.isAggregatable()); ft.setFielddata(true); assertTrue(ft.isAggregatable()); } public void testTermQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals(new TermQuery(new Term("field", "foo")), ft.termQuery("foo", null)); assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); } public void testTermsQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); List terms = new ArrayList<>(); terms.add(new BytesRef("foo")); terms.add(new BytesRef("bar")); assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) @@ -103,7 +107,7 @@ public void testTermsQuery() { } public void testRangeQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) @@ -120,13 +124,13 @@ public void testRangeQuery() { } public void testRegexpQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) @@ -141,13 +145,13 @@ public void testRegexpQuery() { } public void testFuzzyQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) @@ -162,7 +166,7 @@ public void testFuzzyQuery() { } public void testIndexPrefixes() { - TextFieldType ft = createFieldType(); + TextFieldType ft = createFieldType(true); ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10)); Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext()); @@ -222,7 +226,7 @@ public void testIndexPrefixes() { } public void testFetchSourceValue() throws IOException { - TextFieldType fieldType = createFieldType(); + TextFieldType fieldType = createFieldType(true); fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER); assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java new file mode 100644 index 0000000000000..6af717a97b328 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -0,0 +1,173 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.opensearch.core.index.Index; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MapperServiceTestCase; +import org.opensearch.index.mapper.ParsedDocument; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.mockito.Mockito.when; + +public class SourceFieldMatchQueryTests extends MapperServiceTestCase { + + public void testAllPossibleScenarios() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("dessert"); + { + b.field("type", "match_only_text"); + } + b.endObject(); + })); + + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + + String[] desserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + List docs = new ArrayList<>(); + for (String dessert : desserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("dessert", dessert)))); + } + SourceFieldMatchQuery matchBoth = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "juice").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchFilter = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "tart").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchNone = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "gulab").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "jamun").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchMultipleDocs = new SourceFieldMatchQuery( + QueryBuilders.matchAllQuery().toQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(matchBoth, 10); + assertEquals(topDocs.totalHits.value, 1); + assertEquals(topDocs.scoreDocs[0].doc, 0); + + topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchFilter, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchNone, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchMultipleDocs, 10); + assertEquals(topDocs.totalHits.value, 2); + // assert constant score + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertEquals(scoreDoc.score, 1.0, 0.00000000001); + } + } + } + } + + public void testSourceDisabled() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> b.startObject("_source").field("enabled", false).endObject())); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ) + ); + assertEquals( + "SourceFieldMatchQuery error: unable to fetch fields from _source field: " + + "_source is disabled in the mappings for index [test_index]", + e.getMessage() + ); + } + + public void testMissingField() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("dessert"); + { + b.field("type", "match_only_text"); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + + String[] desserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + List docs = new ArrayList<>(); + for (String dessert : desserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("dessert", dessert)))); + } + SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("username", "pie").doToQuery(queryShardContext), // Filter query missing field + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + } + } + } +} diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java index 94c2e4ef7da62..ac78a0d1936ea 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java @@ -239,7 +239,7 @@ protected final XContentBuilder fieldMapping(CheckedConsumer mapperService.fieldType(inv.getArguments()[0].toString())); @@ -254,6 +254,8 @@ QueryShardContext createQueryShardContext(MapperService mapperService) { when(queryShardContext.lookup()).thenReturn(new SearchLookup(mapperService, (ft, s) -> { throw new UnsupportedOperationException("search lookup not available"); })); + when(queryShardContext.getFieldType(any())).thenAnswer(inv -> mapperService.fieldType(inv.getArguments()[0].toString())); + when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); return queryShardContext; } } diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 82f15a590bea6..ac0447dbebf7e 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -103,6 +103,7 @@ import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.Mapper.BuilderContext; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ObjectMapper; import org.opensearch.index.mapper.ObjectMapper.Nested; @@ -760,7 +761,8 @@ public void testSupportedFieldTypes() throws IOException { source.put("type", mappedType.getKey()); // Text is the only field that doesn't support DVs, instead FD - if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false) { + if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false + && mappedType.getKey().equals(MatchOnlyTextFieldMapper.CONTENT_TYPE) == false) { source.put("doc_values", "true"); }