diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d4374d27929..17fd0cfc60bfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -174,7 +174,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Change error message when per shard document limit is breached ([#11312](https://github.com/opensearch-project/OpenSearch/pull/11312)) - Automatically add scheme to discovery.ec2.endpoint ([#11512](https://github.com/opensearch-project/OpenSearch/pull/11512)) - Restore support for Java 8 for RestClient ([#11562](https://github.com/opensearch-project/OpenSearch/pull/11562)) - +- Update `termQuery` in KeyWordField to be searchable through `doc_values` ([#11639](https://github.com/opensearch-project/OpenSearch/pull/11639)) ### Deprecated ### Removed diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml new file mode 100644 index 0000000000000..eff56c2979b82 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml @@ -0,0 +1,1178 @@ +setup: + - skip: + features: [ "headers" ] + version: " - 2.11.99" + reason: "searching with only doc_values was added in 2.12.0" +--- +"search on fields with both index and doc_values enabled": + - do: + indices.create: + index: test-iodvq + body: + mappings: + properties: + some_keyword: + type: keyword + index: true + doc_values: true + byte: + type: byte + index: true + doc_values: true + double: + type: double + index: true + doc_values: true + float: + type: float + index: true + doc_values: true + half_float: + type: half_float + index: true + doc_values: true + integer: + type: integer + index: true + doc_values: true + long: + type: long + index: true + doc_values: true + short: + type: short + index: true + doc_values: true + unsigned_long: + type: unsigned_long + index: true + doc_values: true + + - do: + bulk: + index: test-iodvq + refresh: true + body: + - '{"index": {"_index": "test-iodvq", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-iodvq", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-iodvq", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: "400" + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } + +--- +"search on fields with only index enabled": + - do: + indices.create: + index: test-index + body: + mappings: + properties: + some_keyword: + type: keyword + index: true + doc_values: false + byte: + type: byte + index: true + doc_values: false + double: + type: double + index: true + doc_values: false + float: + type: float + index: true + doc_values: false + half_float: + type: half_float + index: true + doc_values: false + integer: + type: integer + index: true + doc_values: false + long: + type: long + index: true + doc_values: false + short: + type: short + index: true + doc_values: false + unsigned_long: + type: unsigned_long + index: true + doc_values: false + + - do: + bulk: + index: test-index + refresh: true + body: + - '{"index": {"_index": "test-index", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-index", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-index", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: "400" + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } + +--- +"search on fields with only doc_values enabled": + - do: + indices.create: + index: test-doc-values + body: + mappings: + properties: + some_keyword: + type: keyword + index: false + doc_values: true + byte: + type: byte + index: false + doc_values: true + double: + type: double + index: false + doc_values: true + float: + type: float + index: false + doc_values: true + half_float: + type: half_float + index: false + doc_values: true + integer: + type: integer + index: false + doc_values: true + long: + type: long + index: false + doc_values: true + short: + type: short + index: false + doc_values: true + unsigned_long: + type: unsigned_long + index: false + doc_values: true + + - do: + bulk: + index: test-doc-values + refresh: true + body: + - '{"index": {"_index": "test-doc-values", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-doc-values", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-doc-values", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: "400" + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml deleted file mode 100644 index 8829e7b100fdd..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -"search on keyword fields with doc_values enabled": - - do: - indices.create: - index: test - body: - mappings: - properties: - "some_keyword": - type: "keyword" - index: true - doc_values: true - - - do: - bulk: - index: test - refresh: true - body: - - '{"index": {"_index": "test", "_id": "1" }}' - - '{ "some_keyword": "ingesting some random keyword data" }' - - '{ "index": { "_index": "test", "_id": "2" }}' - - '{ "some_keyword": "400" }' - - '{ "index": { "_index": "test", "_id": "3" } }' - - '{ "some_keyword": "5" }' - - - do: - search: - index: test - body: - query: - prefix: - some_keyword: "ing" - - - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } - - - do: - search: - index: test - body: - query: - range: { - "some_keyword": { - "lt": 500 - } } - - - match: { hits.total.value: 2 } diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index c14b2c92c89c3..61981cd69d617 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -387,6 +387,21 @@ protected BytesRef indexedValueForSearch(Object value) { return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); } + @Override + public Query termQuery(Object value, QueryShardContext context) { + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + return new IndexOrDocValuesQuery( + super.termQuery(value, context), + SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value)) + ); + } + if (hasDocValues()) { + return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value)); + } + return super.termQuery(value, context); + } + @Override public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index 393c448330142..c1f87e75574bc 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -41,6 +41,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FuzzyQuery; @@ -102,11 +103,17 @@ public void testIsFieldWithinQuery() throws IOException { public void testTermQuery() { MappedFieldType ft = new KeywordFieldType("field"); - assertEquals(new TermQuery(new Term("field", "foo")), ft.termQuery("foo", null)); + assertEquals( + new IndexOrDocValuesQuery( + new TermQuery(new Term("field", "foo")), + SortedSetDocValuesField.newSlowExactQuery("field", new BytesRef("foo")) + ), + ft.termQuery("foo", null) + ); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testTermQueryWithNormalizer() {