From 81a5ed39d623bc6abafcc81711434e1ede21694c Mon Sep 17 00:00:00 2001 From: Anton Rubin Date: Tue, 15 Oct 2024 11:00:36 +0100 Subject: [PATCH 1/3] add keyword analyzer docs Signed-off-by: Anton Rubin --- _analyzers/keyword.md | 77 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 _analyzers/keyword.md diff --git a/_analyzers/keyword.md b/_analyzers/keyword.md new file mode 100644 index 0000000000..46f1a79d04 --- /dev/null +++ b/_analyzers/keyword.md @@ -0,0 +1,77 @@ +--- +layout: default +title: Keyword analyzer +nav_order: 80 +--- + +# Keyword analyzer + +The `keyword` analyzer doesn’t tokenize the text at all, but instead, it treats the entire input as a single token. This is useful when you want the entire content of a field to be indexed as-is, without breaking it into smaller pieces (tokens). The `keyword` analyzer is often used for fields like email addresses, URLs, product IDs, and other cases where tokenization is not desirable. + +## Example configuration + +You can use the following command to create index `my_keyword_index` with `keyword` analyzer: + +```json +PUT /my_keyword_index +{ + "mappings": { + "properties": { + "my_field": { + "type": "text", + "analyzer": "keyword" + } + } + } +} +``` +{% include copy-curl.html %} + +## Configuring custom analyzer + +You can use the following command to configure index `my_custom_keyword_index` with custom analyzer equivalent to `keyword` analyzer: + +```json +PUT /my_custom_keyword_index +{ + "settings": { + "analysis": { + "analyzer": { + "my_keyword_analyzer": { + "tokenizer": "keyword" + } + } + } + } +} +``` +{% include copy-curl.html %} + +## Generated tokens + +Use the following request to examine the tokens generated using the created analyzer: + +```json +POST /my_custom_keyword_index/_analyze +{ + "analyzer": "my_keyword_analyzer", + "text": "Just one token" +} +``` +{% include copy-curl.html %} + +The response contains the generated tokens: + +```json +{ + "tokens": [ + { + "token": "Just one token", + "start_offset": 0, + "end_offset": 14, + "type": "word", + "position": 0 + } + ] +} +``` From ffb11c9c5eee6f92f364936a2d51c8cca846117c Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Fri, 6 Dec 2024 13:24:48 -0500 Subject: [PATCH 2/3] Doc review Signed-off-by: Fanit Kolchina --- _analyzers/keyword.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/_analyzers/keyword.md b/_analyzers/keyword.md index 46f1a79d04..7035fc6abd 100644 --- a/_analyzers/keyword.md +++ b/_analyzers/keyword.md @@ -6,11 +6,11 @@ nav_order: 80 # Keyword analyzer -The `keyword` analyzer doesn’t tokenize the text at all, but instead, it treats the entire input as a single token. This is useful when you want the entire content of a field to be indexed as-is, without breaking it into smaller pieces (tokens). The `keyword` analyzer is often used for fields like email addresses, URLs, product IDs, and other cases where tokenization is not desirable. +The `keyword` analyzer doesn’t tokenize the text at all. Instead, it treats the entire input as a single token. This is useful when you want the entire content of a field to be indexed as is, without breaking it into smaller pieces (tokens). The `keyword` analyzer is often used for fields like email addresses, URLs, product IDs, and other cases where tokenization is not desirable. -## Example configuration +## Example -You can use the following command to create index `my_keyword_index` with `keyword` analyzer: +Use the following command to create an index named `my_keyword_index` with a `keyword` analyzer: ```json PUT /my_keyword_index @@ -27,9 +27,9 @@ PUT /my_keyword_index ``` {% include copy-curl.html %} -## Configuring custom analyzer +## Configuring a custom analyzer -You can use the following command to configure index `my_custom_keyword_index` with custom analyzer equivalent to `keyword` analyzer: +Use the following command to configure an index with a custom analyzer that is equivalent to the `keyword` analyzer: ```json PUT /my_custom_keyword_index @@ -49,7 +49,7 @@ PUT /my_custom_keyword_index ## Generated tokens -Use the following request to examine the tokens generated using the created analyzer: +Use the following request to examine the tokens generated using the analyzer: ```json POST /my_custom_keyword_index/_analyze From c50938c179f86e59db2063293ebb530bc2f45a9d Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:54:10 -0500 Subject: [PATCH 3/3] Update _analyzers/keyword.md Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --- _analyzers/keyword.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_analyzers/keyword.md b/_analyzers/keyword.md index 7035fc6abd..3aec99d1d4 100644 --- a/_analyzers/keyword.md +++ b/_analyzers/keyword.md @@ -6,7 +6,7 @@ nav_order: 80 # Keyword analyzer -The `keyword` analyzer doesn’t tokenize the text at all. Instead, it treats the entire input as a single token. This is useful when you want the entire content of a field to be indexed as is, without breaking it into smaller pieces (tokens). The `keyword` analyzer is often used for fields like email addresses, URLs, product IDs, and other cases where tokenization is not desirable. +The `keyword` analyzer doesn't tokenize text at all. Instead, it treats the entire input as a single token and does not break it into individual tokens. The `keyword` analyzer is often used for fields containing email addresses, URLs, or product IDs and in other cases where tokenization is not desirable. ## Example