From ba6b5164f65ec3f0f2493fc887b1afc18f393e2d Mon Sep 17 00:00:00 2001 From: Anton Rubin Date: Mon, 14 Oct 2024 13:59:28 +0100 Subject: [PATCH] add simple analyzer docs Signed-off-by: Anton Rubin --- _analyzers/simple.md | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 _analyzers/simple.md diff --git a/_analyzers/simple.md b/_analyzers/simple.md new file mode 100644 index 0000000000..9dce2d4c38 --- /dev/null +++ b/_analyzers/simple.md @@ -0,0 +1,70 @@ +--- +layout: default +title: Simple analyzer +nav_order: 50 +--- + +# Simple analyzer + +The `simple` analyzer is a very basic analyzer that breaks text into terms at non-letter characters and lowercases the terms. Unlike the `standard` analyzer, the `simple` analyzer treats everything except alphabetic characters as delimiters, meaning it does not recognize numbers, punctuation, or special characters as part of the tokens. + +## Example configuration + +You can use the following command to create index `my_simple_index` with `simple` analyzer: + +```json +PUT /my_simple_index +{ + "mappings": { + "properties": { + "my_field": { + "type": "text", + "analyzer": "simple" + } + } + } +} +``` +{% include copy-curl.html %} + +## Configuring custom analyzer + +You can use the following command to configure index `my_custom_simple_index` with custom analyzer equivalent to `simple` analyzer but with added `html_strip` character filter: + +```json +PUT /my_custom_simple_index +{ + "settings": { + "analysis": { + "char_filter": { + "html_strip": { + "type": "html_strip" + } + }, + "tokenizer": { + "my_lowercase_tokenizer": { + "type": "lowercase" + } + }, + "analyzer": { + "my_custom_simple_analyzer": { + "type": "custom", + "char_filter": ["html_strip"], + "tokenizer": "my_lowercase_tokenizer", + "filter": ["lowercase"] + } + } + } + }, + "mappings": { + "properties": { + "my_field": { + "type": "text", + "analyzer": "my_custom_simple_analyzer" + } + } + } +} +``` +{% include copy-curl.html %} +