From cc7651ee3ba1dd365e72a38a996ab59b45bfef00 Mon Sep 17 00:00:00 2001 From: Greg Schohn Date: Mon, 2 Dec 2024 10:58:05 -0500 Subject: [PATCH] Further work to support bulk delete, index create with type mapping migration, and regex target index patterns. Fix regex replace to use java's regex classes instead of the builtin ones. The builtin filter uses Google's re2j library, which doesn't do backreferences. Now both capture and replace use custom filters that use the builtin java regex library. Tests are in place to do some index remapping w/ backreferenced captures. For bulk APIs, only the delete command is fully supported, but the structure for the others should be in place. I've also worked on improving the test for creating an index and fixed numerous issues there. It's also CRITICAL to note that constructing dictionaries in jinjava with keys that are defined via variables is NOT SUPPORTED. See https://github.com/HubSpot/jinjava/issues/379. The workaround that I'm currently using is to construct a map as a string in json and parse it into a map - or to construct maps dynamically. Also note that python dictionary operations like 'update' or 'delete' are NOT present since we don't have the luxury of overriding the dictionary implementation easily. This is also something that has a significant impact on the ease of use for jinjava and to maintain compatibility to the python implementation. Signed-off-by: Greg Schohn --- .../transform/JinjavaTransformer.java | 15 ++- ...ilter.java => JavaRegexCaptureFilter.java} | 2 +- .../jinjava/JavaRegexReplaceFilter.java | 50 ++++++++ .../transform/jinjava/ThrowTag.java | 46 +++++++ .../jinjava/common/featureEnabled.j2 | 4 +- .../main/resources/jinjava/common/route.j2 | 9 +- .../build.gradle | 2 + .../jinjava/typeMappings/makeNoop.j2 | 3 + .../jinjava/typeMappings/preserveAll.j2 | 3 + .../jinjava/typeMappings/replayer.j2 | 101 ++------------- .../typeMappings/rewriteBulkRequest.j2 | 76 ++++++++++++ .../typeMappings/rewriteCreateIndexRequest.j2 | 58 +++++++++ .../typeMappings/rewriteDocumentRequest.j2 | 15 +++ .../typeMappings/rewriteIndexForTarget.j2 | 24 ++++ ...ppingsSanitizationTransformerBulkTest.java | 98 +++++++++++++++ ...peMappingsSanitizationTransformerTest.java | 116 ++++++++++++------ 16 files changed, 479 insertions(+), 143 deletions(-) rename transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/{RegexCaptureFilter.java => JavaRegexCaptureFilter.java} (96%) create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexReplaceFilter.java create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/ThrowTag.java create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/makeNoop.j2 create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/preserveAll.j2 create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteBulkRequest.j2 create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteCreateIndexRequest.j2 create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2 create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2 create mode 100644 transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerBulkTest.java diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/JinjavaTransformer.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/JinjavaTransformer.java index 3d5c45057..c610b935b 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/JinjavaTransformer.java +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/JinjavaTransformer.java @@ -8,7 +8,9 @@ import org.opensearch.migrations.transform.jinjava.DynamicMacroFunction; import org.opensearch.migrations.transform.jinjava.NameMappingClasspathResourceLocator; -import org.opensearch.migrations.transform.jinjava.RegexCaptureFilter; +import org.opensearch.migrations.transform.jinjava.JavaRegexCaptureFilter; +import org.opensearch.migrations.transform.jinjava.JavaRegexReplaceFilter; +import org.opensearch.migrations.transform.jinjava.ThrowTag; import com.fasterxml.jackson.databind.ObjectMapper; import com.hubspot.jinjava.Jinjava; @@ -38,17 +40,18 @@ public JinjavaTransformer(String templateString, jinjava = new Jinjava(); this.createContextWithSourceFunction = createContextWithSource; jinjava.setResourceLocator(resourceLocator); - jinjava.getGlobalContext().registerFilter(new RegexCaptureFilter()); - var dynamicMacroFunction = new ELFunctionDefinition( + jinjava.getGlobalContext().registerFilter(new JavaRegexCaptureFilter()); + jinjava.getGlobalContext().registerFilter(new JavaRegexReplaceFilter()); + + jinjava.getGlobalContext().registerFunction(new ELFunctionDefinition( "", "invoke_macro", DynamicMacroFunction.class, "invokeMacro", String.class, Object[].class - ); - - jinjava.getGlobalContext().registerFunction(dynamicMacroFunction); + )); + jinjava.getGlobalContext().registerTag(new ThrowTag()); this.templateStr = templateString; } diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/RegexCaptureFilter.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexCaptureFilter.java similarity index 96% rename from transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/RegexCaptureFilter.java rename to transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexCaptureFilter.java index 7e1cbdd6b..5fbd6610d 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/RegexCaptureFilter.java +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexCaptureFilter.java @@ -12,7 +12,7 @@ import com.hubspot.jinjava.lib.filter.Filter; import lombok.SneakyThrows; -public class RegexCaptureFilter implements Filter { +public class JavaRegexCaptureFilter implements Filter { private static LoadingCache regexCache = CacheBuilder.newBuilder().build(CacheLoader.from((Function)Pattern::compile)); diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexReplaceFilter.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexReplaceFilter.java new file mode 100644 index 000000000..fe36364bf --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/JavaRegexReplaceFilter.java @@ -0,0 +1,50 @@ +package org.opensearch.migrations.transform.jinjava; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.google.common.base.Function; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.hubspot.jinjava.interpret.JinjavaInterpreter; +import com.hubspot.jinjava.lib.filter.Filter; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class JavaRegexReplaceFilter implements Filter { + + private static LoadingCache regexCache = + CacheBuilder.newBuilder().build(CacheLoader.from((Function)Pattern::compile)); + + @SneakyThrows + private static Pattern getCompiledPattern(String pattern) { + return regexCache.get(pattern); + } + + @Override + public String getName() { + return "regex_replace"; + } + + @Override + public Object filter(Object var, JinjavaInterpreter interpreter, String... args) { + if (var == null || args.length < 2) { + return null; + } + + String input = var.toString(); + String pattern = args[0]; + String replacement = args[1]; + + try { + Matcher matcher = getCompiledPattern(pattern).matcher(input); + var rval = matcher.replaceAll(replacement); + log.atError().setMessage("replaced value {}").addArgument(rval).log(); + return rval; + } catch (Exception e) { + return null; + } + } +} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/ThrowTag.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/ThrowTag.java new file mode 100644 index 000000000..b4e314181 --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/java/org/opensearch/migrations/transform/jinjava/ThrowTag.java @@ -0,0 +1,46 @@ +package org.opensearch.migrations.transform.jinjava; + +import com.hubspot.jinjava.doc.annotations.JinjavaDoc; +import com.hubspot.jinjava.doc.annotations.JinjavaParam; +import com.hubspot.jinjava.doc.annotations.JinjavaSnippet; +import com.hubspot.jinjava.interpret.JinjavaInterpreter; +import com.hubspot.jinjava.lib.tag.Tag; +import com.hubspot.jinjava.tree.TagNode; + +@JinjavaDoc( + value = "Throws a runtime exception with the specified message", + params = { + @JinjavaParam(value = "message", type = "string", desc = "The error message to throw") + }, + snippets = { + @JinjavaSnippet( + code = "{% throw 'Invalid input provided' %}" + ) + } +) + +public class ThrowTag implements Tag { + private static final String TAG_NAME = "throw"; + + @Override + public String getName() { + return TAG_NAME; + } + + @Override + public String interpret(TagNode tagNode, JinjavaInterpreter interpreter) { + String message = interpreter.render(tagNode.getHelpers().trim()); + throw new JinjavaThrowTagException(message); + } + + public static class JinjavaThrowTagException extends RuntimeException { + public JinjavaThrowTagException(String message) { + super(message); + } + } + + @Override + public String getEndTagName() { + return null; + } +} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/featureEnabled.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/featureEnabled.j2 index 2d44f468c..a7c2f6222 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/featureEnabled.j2 +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/featureEnabled.j2 @@ -5,8 +5,8 @@ {%- set ns = namespace(value=features) -%} {%- set debug = namespace(log=[]) -%} {%- for key in (path | split('.')) -%} - {% set debug.log = debug.log + ["k:"+key] -%} - {% set debug.log = debug.log + ["ismapping?:"+(ns.value is mapping)] -%} + {%- set debug.log = debug.log + ["k:"+key] -%} + {%- set debug.log = debug.log + ["ismapping?:"+(ns.value is mapping)] -%} {%- if ns.value is mapping and key in ns.value -%} {%- set ns.value = ns.value[key] -%} {%- else -%} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/route.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/route.j2 index bc4793a4d..47543b7ca 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/route.j2 +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonJinjavaTransformer/src/main/resources/jinjava/common/route.j2 @@ -1,8 +1,9 @@ -{% import "common/featureEnabled.j2" as fscope %} -{% macro route(input, field_to_match, feature_flags, default_action, routes) %} +{%- import "common/featureEnabled.j2" as fscope -%} +{%- import "common/featureEnabled.j2" as fscope -%} +{%- macro route(input, field_to_match, feature_flags, default_action, routes) -%} {%- set ns = namespace(result=none, matched=false) -%} {%- for pattern, action_fn, feature_name_param in routes if not ns.matched -%} - {% set feature_name = feature_name_param | default(action_fn) %} + {%- set feature_name = feature_name_param | default(action_fn) -%} {%- if not ns.matched -%} {# we haven't found a match yet, otherwise skip the rest #} {%- set match = field_to_match | regex_capture(pattern) -%} {%- if match is not none -%} @@ -18,4 +19,4 @@ {%- else -%} {{- ns.result -}} {%- endif -%} -{% endmacro %} \ No newline at end of file +{%- endmacro -%} \ No newline at end of file diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/build.gradle b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/build.gradle index 05540ced8..f0912fded 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/build.gradle +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/build.gradle @@ -13,7 +13,9 @@ dependencies { testImplementation testFixtures(project(path: ':testHelperFixtures')) testImplementation testFixtures(project(path: ':TrafficCapture:trafficReplayer')) + testImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind' testImplementation group: 'com.google.guava', name: 'guava' + testImplementation group: 'org.hamcrest', name: 'hamcrest' testImplementation group: 'org.junit.jupiter', name:'junit-jupiter-api' testImplementation group: 'org.junit.jupiter', name:'junit-jupiter-params' testImplementation group: 'org.slf4j', name: 'slf4j-api' diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/makeNoop.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/makeNoop.j2 new file mode 100644 index 000000000..e34232799 --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/makeNoop.j2 @@ -0,0 +1,3 @@ +{%- macro make_request() -%} + { "method": "GET", "URI": "/" } +{%- endmacro -%} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/preserveAll.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/preserveAll.j2 new file mode 100644 index 000000000..009f4b094 --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/preserveAll.j2 @@ -0,0 +1,3 @@ +{%- macro make_keep_json(source_and_mappings) -%} + { "preserve": "*" } +{%- endmacro -%} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/replayer.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/replayer.j2 index 29910a959..f6cbdcdd2 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/replayer.j2 +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/replayer.j2 @@ -1,99 +1,20 @@ -{%- include "common/featureEnabled.j2" -%} - -{%- macro preserve_original(source_and_mappings) -%} - { "preserve": "*" } -{%- endmacro -%} - -{%- macro make_no_op() -%} - { "method": "GET", "URI": "/" } -{%- endmacro -%} - -{%- macro rewrite_uri_to_strip_types(source_index, source_type, regex_index_mappings) -%} - {%- set ns = namespace(target_index=none) -%} - {%- for entry in regex_index_mappings -%} - {%- set idx_regex = entry.get(0) -%} - {%- set type_regex = entry[1] -%} - {%- set target_idx_pattern = entry[2] -%} - - {%- if ns.target_index is none -%} - {%- set conjoined_source = source_index + "/" + source_type -%} - {%- set conjoined_regex = idx_regex + "/" + type_regex -%} - {%- set didMatch = conjoined_source | regex_capture(conjoined_regex) -%} - {%- if didMatch is not none -%} - {%- set ns.target_index = conjoined_source | regex_replace(conjoined_regex, target_idx_pattern) -%} - {%- endif -%} - {%- endif -%} - {%- endfor -%} - {{- ns.target_index -}} -{%- endmacro -%} - -{%- macro rewrite_uri_for_types(match, input_map) -%} - {%- set target_index = (input_map.index_mappings[match.group1] | default({}))[match.group2] -%} - {%- if target_index is none %} {# not sure if default arguments would be eagerly evaluated #} - {%- set target_index = invoke_macro('rewrite_uri_to_strip_types', match.group1, match.group2, input_map.regex_index_mappings) -%} - {%- endif -%} - {%- if target_index is none -%} - {{ make_no_op() }} - {%- else -%} - { - "method": "{{ input_map.request.method }}", - "URI": "/{{ target_index }}/_doc/{{ match.group3 }}", - "preserve": ["headers","payload"] - } - {%- endif -%} -{%- endmacro -%} - -{% macro rewrite_create_index_excise(match, input_map) -%} -{ - "method": "{{ input_map.request.method }}", - "URI": "{{ input_map.index_mappings[match.group1] }}", - "payload": { - "inlinedJsonBody": { - {%- for key, value in input_map.request.body.items() -%} - {%- if key != "mappings" -%} - "{{ key }}": {{ value | tojson }}, - {%- endif -%} - {%- endfor -%} - "mappings": { - "properties": { - "type": { - "type": "keyword" - } - {%- for type_name, type_props in input_map.request.body.mappings.items() -%} - {%- for prop_name, prop_def in type_props.properties.items() -%} - , - "{{- prop_name -}}": {{- prop_def | tojson -}} - {%- endfor -%} - {%- endfor -%} - } - } - } - } -} -{%- endmacro -%} +{%- import "common/route.j2" as rscope -%} +{%- import "typeMappings/preserveAll.j2" as preserve -%} +{%- include "typeMappings/rewriteDocumentRequest.j2" -%} +{%- include "typeMappings/rewriteBulkRequest.j2" -%} +{%- include "typeMappings/rewriteCreateIndexRequest.j2" -%} -{% macro rewrite_create_index(match, input_map) -%} - {% set target_mappings = input_map.index_mappings[match.group1] | default({}) | length %} - {% if target_mappings == 0 %} - {{ make_no_op() }} - {% elif num_mappings == 1 %} - {{ rewrite_create_index_excise(match, input_map) }} - {% elif num_mappings > 1 %} - {% else %} - {{ preserve_original(input_mappings) }} - {% endif %} -{%- endmacro -%} -{% set source_and_mappings = { +{%- set source_and_mappings = { 'request': request, 'index_mappings': index_mappings, 'regex_index_mappings': regex_index_mappings} -%} -{%- import "common/route.j2" as rscope -%} -{{- rscope.route(source_and_mappings, request.method + " " + request.URI, flags, 'preserve_original', +-%} +{{- rscope.route(source_and_mappings, request.method + " " + request.URI, flags, 'make_keep_json', [ - ('(?:PUT|POST) /([^/]*)/([^/]*)/(.*)', 'rewrite_uri_for_types', 'rewrite_add_request_to_strip_types'), - ( 'GET /([^/]*)/([^/]*)/.*', 'rewrite_uri_for_types', 'rewrite_get_request_to_strip_types'), + ('(?:PUT|POST) /([^/]*)/([^/]*)/(.*)', 'rewrite_doc_request', 'rewrite_add_request_to_strip_types'), + ( 'GET /([^/]*)/([^/]*)/.*', 'rewrite_doc_request', 'rewrite_get_request_to_strip_types'), + ('(?:PUT|POST) /_bulk', 'rewrite_bulk', 'rewrite_bulk'), ('(?:PUT|POST) /([^/]*)', 'rewrite_create_index', 'rewrite_create_index') ]) -}} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteBulkRequest.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteBulkRequest.j2 new file mode 100644 index 000000000..c611a3178 --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteBulkRequest.j2 @@ -0,0 +1,76 @@ +{%- include "typeMappings/rewriteIndexForTarget.j2" -%} +{%- import "typeMappings/rewriteIndexForTarget.j2" as transidx -%} + +{%- macro run_create(command, target_index, doc) -%} +{%- endmacro -%} +{%- macro run_index(command, target_index, doc) -%} +{%- endmacro -%} + +{%- macro run_delete(command, target_index) -%} + {%- if target_index -%} + {%- set ns = namespace(delete_inner={}) -%} + {%- for key, value in command.items() -%} + {%- if key != '_type' and key != '_index' -%} + {%- set inner_json = value | tojson -%} + {%- set jsonblob = ("{\"" + key + "\":" + inner_json + "}") | fromjson -%} + {%- set ns.delete_inner = ns.delete_inner + jsonblob -%} + {%- endif -%} + {%- endfor -%} + {%- set index_json = target_index | tojson -%} + {%- set index_blob = ("{\"_index\":" + index_json + "}") | fromjson -%} + {%- set ns.delete_inner = ns.delete_inner + index_blob -%} + {%- set final_json = ("{\"delete\":" + (ns.delete_inner | tojson) + "}") | fromjson -%} + {{ final_json | tojson }} + {%- endif -%} +{%- endmacro -%} + +{%- macro run_update(command, target_index, doc) -%} +{%- endmacro -%} +{%- macro rewrite_bulk_for_default_source_index(uri_match, input_map, source_index) -%} +{ + "preserve": ["headers","method","URI","protocol"], + "payload": { + "inlinedJsonSequenceBodies": [ + {%- set operation_types = ['delete', 'update', 'index', 'create'] -%} + {%- for item in input_map.request.payload.inlinedJsonSequenceBodies -%} + {%- set operation = namespace(type=None) -%} + {%- for type in operation_types -%} + {%- if item is mapping and type in item -%} + {%- set operation.type = type -%} + {%- endif -%} + {%- endfor -%} + + {%- if operation.type is not none -%} + {%- set command = item[operation.type] -%} + {%- set target_index = transidx.convert_source_index_to_target(command['_index'], command['_type'], input_map.index_mappings, input_map.regex_index_mappings) -%} +{# command['_index'] {{ command['_index'] }}, command['_type'] = {{ command['_type'] }}, input_map.index_mappings = {{ input_map.index_mappings }}, input_map.regex_index_mappings = {{ input_map.regex_index_mappings }})#} + {%- if operation.type == 'delete' -%} + {{ run_delete(command, target_index) }} + {%- else -%} + {%- if loop.index < operations|length -%} + {%- set next_item = operations[loop.index] -%} + {%- if operation.type == 'create' -%} + {{ run_create(command, target_index, next_item) }} + {%- elif operation.type == 'update' -%} + {{ run_update(command, target_index, next_item) }} + {%- elif operation.type == 'index' -%} + {{ run_index(command, target_index, next_item) }} + {%- endif -%} + {%- set loop.index = loop.index + 1 -%} + {%- else -%} + Handle case where there's no next item but one was expected + {# {{ throw_error('Expected document after ' + operation.type + ' operation') }}#} + {%- endif -%} + {%- endif -%} + {%- else -%} + Handle case where no valid operation type was found + {# {{ throw_error('Invalid operation type in item: ' + item|string) }}#} + {%- endif -%} + {%- endfor -%} + ] + } +} +{%- endmacro -%} +{%- macro rewrite_bulk(match, input_map) -%} + {{ rewrite_bulk_for_default_source_index(match, input_map, none) }} +{%- endmacro -%} \ No newline at end of file diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteCreateIndexRequest.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteCreateIndexRequest.j2 new file mode 100644 index 000000000..498454e57 --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteCreateIndexRequest.j2 @@ -0,0 +1,58 @@ +{%- import "typeMappings/makeNoop.j2" as noop -%} +{%- import "typeMappings/preserveAll.j2" as preserve -%} + +{%- macro rewrite_create_index_as_unioned_excise(source_index_name, target_index_name, input_map) -%} + {%- set source_input_types = input_map.index_mappings[source_index_name] -%} + {%- set source_type_name = source_input_types.keys() | first() -%} + { + "method": "{{ input_map.request.method }}", + "URI": "/{{ target_index_name }}", + "payload": { + "inlinedJsonBody": { + {%- for key, value in input_map.request.payload.inlinedJsonBody.items() -%} + {%- if key != "mappings" -%} + "{{ key }}": {{ value | tojson }}, + {%- endif -%} + {%- endfor -%} + "mappings": { + "properties": { + {%- set ns = namespace(combined_props={"type": "keyword"}) -%} + {%- for source_type_name in source_input_types.keys() -%} + {%- set type_props = input_map.request.payload.inlinedJsonBody.mappings.get(source_type_name) -%} + {%- for prop_name, prop_def in type_props.properties.items() -%} + {%- if prop_name in ns.combined_props -%} + {%- if ns.combined_props[prop_name] != prop_def -%} + {%- throw "Conflicting definitions for property {{ prop_name }} ({{ ns.combined_props[prop_name] }} and {{ prop_def }})" -%} + {%- endif -%} + {%- else -%} + {%- set body = prop_def | tojson -%} + {%- set jsonblob = ("{\"" + prop_name + "\":" + body + "}") | fromjson -%} + {%- set ns.combined_props = ns.combined_props + jsonblob -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + + {%- for prop_name, prop_def in ns.combined_props.items() -%} + "{{- prop_name -}}": {{- prop_def | tojson -}}, + {%- endfor -%} + + "type": { "type": "keyword" } + } + } + } + } + } +{%- endmacro -%} +{%- macro rewrite_create_index(match, input_map) -%} + {%- set source_index_name = match.group1 -%} + {%- set target_indices = (input_map.index_mappings[source_index_name] | default({})).values() | unique() -%} + {%- set num_mappings = target_indices | length -%} + {%- if num_mappings == 0 -%} + {{- noop.make_request() -}} + {%- elif num_mappings == 1 -%} + {{- rewrite_create_index_as_unioned_excise(source_index_name, (target_indices | first), input_map) -}} + {%- elif num_mappings > 1 -%} + {# Need to extend the replayer to allow multiple requests since this needs to become multiple requests #} + {{- preserve.make_keep_json(input_mappings) -}} + {%- endif -%} +{%- endmacro -%} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2 new file mode 100644 index 000000000..f35fb3f0d --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2 @@ -0,0 +1,15 @@ +{%- import "typeMappings/makeNoop.j2" as noop -%} +{%- import "typeMappings/rewriteIndexForTarget.j2" as transidx -%} + +{%- macro rewrite_doc_request(match, input_map) -%} + {%- set target_index = transidx.convert_source_index_to_target(match.group1, match.group2, input_map.index_mappings, input_map.regex_index_mappings) -%} + {%- if target_index is none -%} + {{- noop.make_request() -}} + {%- else -%} + { + "method": "{{ input_map.request.method }}", + "URI": "/{{ target_index }}/_doc/{{ match.group3 }}", + "preserve": ["headers","payload"] + } + {%- endif -%} +{%- endmacro -%} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2 new file mode 100644 index 000000000..588d809ca --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2 @@ -0,0 +1,24 @@ +{%- macro convert_source_index_to_target_via_regex(source_index, source_type, regex_index_mappings) -%} + {%- set ns = namespace(target_index=none) -%} + {%- for idx_regex, type_regex, target_idx_pattern in regex_index_mappings -%} + {%- if ns.target_index is none -%} + {%- set conjoined_source = source_index + "/" + source_type -%} + {%- set conjoined_regex = idx_regex + "/" + type_regex -%} + {%- set didMatch = conjoined_source | regex_capture(conjoined_regex) -%} + {%- if didMatch is not none -%} +{# conjoined_source = {{ conjoined_source }} conjoined_regex {{ conjoined_regex }} target_idx_pattern = {{ target_idx_pattern }}#} + {%- set ns.target_index = conjoined_source | regex_replace(conjoined_regex, target_idx_pattern) -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {{- ns.target_index -}} +{%- endmacro -%} + +{%- macro convert_source_index_to_target(source_index, source_type, index_mappings, regex_index_mappings) -%} + {%- set ns = namespace(target_index=none) -%} + {%- set ns.target_index2 = (index_mappings[source_index] | default({}))[source_type] -%} + {%- if ns.target_index2 is none -%} + {%- set ns.target_index2 = convert_source_index_to_target_via_regex(source_index, source_type, regex_index_mappings) -%} + {%- endif -%} + {{ ns.target_index2 }} +{%- endmacro -%} \ No newline at end of file diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerBulkTest.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerBulkTest.java new file mode 100644 index 000000000..bb7215461 --- /dev/null +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerBulkTest.java @@ -0,0 +1,98 @@ +package org.opensearch.migrations.transform; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.hamcrest.Matchers.*; + +@Slf4j +public class TypeMappingsSanitizationTransformerBulkTest { + + private final static ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static TypeMappingsSanitizationTransformer indexTypeMappingRewriter; + @BeforeAll + static void initialize() throws IOException { + var indexMappings = Map.of( + "indexA", Map.of( + "type1", "indexA_1", + "type2", "indexA_2"), + "indexB", Map.of( + "type1", "indexB", + "type2", "indexB"), + "indexC", Map.of( + "type2", "indexC")); + var regexIndexMappings = List.of( + List.of("time-(.*)", "(.*)", "time-$1-$2")); + indexTypeMappingRewriter = new TypeMappingsSanitizationTransformer(indexMappings, regexIndexMappings); + } + + @Test + public void testBulk() throws Exception { + var testString = + "{\n" + + " \"" + JsonKeysForHttpMessage.METHOD_KEY + "\": \"PUT\",\n" + + " \"" + JsonKeysForHttpMessage.PROTOCOL_KEY + "\": \"HTTP/1.1\",\n" + + " \"" + JsonKeysForHttpMessage.URI_KEY + "\": \"/_bulk\",\n" + + " \"" + JsonKeysForHttpMessage.HEADERS_KEY + "\": {},\n" + + " \"" + JsonKeysForHttpMessage.PAYLOAD_KEY + "\": {\n" + + " \"" + JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY + "\": [\n" + +// "{ \"index\" : { \"_index\" : \"test\", \"_type\" : \"type1\", \"_id\" : \"1\" } },\n" + +// "{ \"field1\" : \"value1\" },\n" + + "{ \"delete\" : { \"_index\" : \"test\", \"_type\" : \"type1\", \"_id\" : \"2\" } },\n" + + "{ \"delete\" : { \"_index\" : \"time-January_1970\", \"_type\" : \"cpu\", \"_id\" : \"8\" } }\n" + +// "{ \"create\" : { \"_index\" : \"test\", \"_type\" : \"type1\", \"_id\" : \"3\" } },\n" + +// "{ \"field1\" : \"value3\" },\n" + +// "{ \"update\" : {\"_id\" : \"1\", \"_type\" : \"type1\", \"_index\" : \"test\"} },\n" + +// "{ \"doc\" : {\"field2\" : \"value2\"} }\n" + + " ]\n" + + " }\n" + + "}"; + + var expectedString = + "{\n" + + " \"" + JsonKeysForHttpMessage.METHOD_KEY + "\": \"PUT\",\n" + + " \"" + JsonKeysForHttpMessage.PROTOCOL_KEY + "\": \"HTTP/1.1\",\n" + + " \"" + JsonKeysForHttpMessage.URI_KEY + "\": \"/_bulk\",\n" + + " \"" + JsonKeysForHttpMessage.HEADERS_KEY + "\": {},\n" + + " \"" + JsonKeysForHttpMessage.PAYLOAD_KEY + "\": {\n" + + " \"" + JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY + "\": [\n" + +// "{ \"index\" : { \"_index\" : \"test\", \"_type\" : \"type1\", \"_id\" : \"1\" } },\n" + +// "{ \"field1\" : \"value1\" },\n" + + "{ \"delete\" : { \"_index\" : \"time-January_1970-cpu\", \"_id\" : \"8\" } }\n" + +// "{ \"create\" : { \"_index\" : \"test\", \"_type\" : \"type1\", \"_id\" : \"3\" } },\n" + +// "{ \"field1\" : \"value3\" },\n" + +// "{ \"update\" : {\"_id\" : \"1\", \"_type\" : \"type1\", \"_index\" : \"test\"} },\n" + +// "{ \"doc\" : {\"field2\" : \"value2\"} }\n" + + " ]\n" + + " }\n" + + "}"; + + + var resultObj = indexTypeMappingRewriter.transformJson(OBJECT_MAPPER.readValue(testString, LinkedHashMap.class)); + log.atInfo().setMessage("resultStr = {}").addArgument(() -> { + try { + return OBJECT_MAPPER.writeValueAsString(resultObj); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }).log(); + Assertions.assertEquals(normalize(OBJECT_MAPPER.readValue(expectedString, LinkedHashMap.class)), normalize(resultObj)); + } + + static String normalize(Object obj) throws Exception { + return new ObjectMapper() + .configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true) + .configure(SerializationFeature.INDENT_OUTPUT, true) + .writeValueAsString(obj); + } +} diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerTest.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerTest.java index bd57a153f..216b69e94 100644 --- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerTest.java +++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformerTest.java @@ -1,17 +1,21 @@ package org.opensearch.migrations.transform; import java.io.IOException; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper; @Slf4j class TypeMappingsSanitizationTransformerTest { + private final static ObjectMapper objMapper = new ObjectMapper(); private static TypeMappingsSanitizationTransformer indexTypeMappingRewriter; @BeforeAll @@ -23,8 +27,9 @@ static void initialize() throws IOException { "indexB", Map.of( "type1", "indexB", "type2", "indexB"), - "indexC", Map.of( - "type2", "indexC")); + "socialTypes", Map.of( + "tweet", "communal", + "user", "communal")); var regexIndexMappings = List.of( List.of("time-(.*)", "(.*)", "time-\\1-\\2")); indexTypeMappingRewriter = new TypeMappingsSanitizationTransformer(indexMappings, regexIndexMappings); @@ -44,7 +49,6 @@ public void testPutDoc() throws Exception { " }\n" + " }\n" + "}"; - var objMapper = new ObjectMapper(); var resultObj = indexTypeMappingRewriter.transformJson(objMapper.readValue(testString, LinkedHashMap.class)); var resultStr = objMapper.writeValueAsString(resultObj); log.atInfo().setMessage("resultStr = {}").setMessage(resultStr).log(); @@ -64,47 +68,79 @@ public void testPutDocRegex() throws Exception { " }\n" + " }\n" + "}"; - var objMapper = new ObjectMapper(); var resultObj = indexTypeMappingRewriter.transformJson(objMapper.readValue(testString, LinkedHashMap.class)); var resultStr = objMapper.writeValueAsString(resultObj); log.atInfo().setMessage("resultStr = {}").setMessage(resultStr).log(); } - @Test - public void testPutIndex() throws Exception { - var testString = + private static String makeMultiTypePutIndexRequest(String indexName) { + return "{\n" + + " \"" + JsonKeysForHttpMessage.METHOD_KEY + "\": \"PUT\",\n" + + " \"" + JsonKeysForHttpMessage.URI_KEY + "\": \"/" + indexName + "\",\n" + + " \"" + JsonKeysForHttpMessage.PAYLOAD_KEY + "\": {\n" + + " \"" + JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY + "\": " + "{\n" + - " \"" + JsonKeysForHttpMessage.METHOD_KEY + "\": \"PUT\",\n" + - " \"" + JsonKeysForHttpMessage.URI_KEY + "\": \"/indexA\",\n" + - " \"" + JsonKeysForHttpMessage.PAYLOAD_KEY + "\": {\n" + - " \"" + JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY + "\": " + - "{\n" + - " \"settings\" : {\n" + - " \"number_of_shards\" : 1\n" + - " }," + - " \"mappings\": {\n" + - " \"user\": {\n" + - " \"properties\": {\n" + - " \"name\": { \"type\": \"text\" },\n" + - " \"user_name\": { \"type\": \"keyword\" },\n" + - " \"email\": { \"type\": \"keyword\" }\n" + - " }\n" + - " },\n" + - " \"tweet\": {\n" + - " \"properties\": {\n" + - " \"content\": { \"type\": \"text\" },\n" + - " \"user_name\": { \"type\": \"keyword\" },\n" + - " \"tweeted_at\": { \"type\": \"date\" }\n" + - " }\n" + - " }\n" + - " }\n" + - "}" + - "\n" + - " }\n" + - "}"; - var objMapper = new ObjectMapper(); - var resultObj = indexTypeMappingRewriter.transformJson(objMapper.readValue(testString, LinkedHashMap.class)); - var resultStr = objMapper.writeValueAsString(resultObj); - log.atInfo().setMessage("resultStr = {}").setMessage(resultStr).log(); + " \"settings\" : {\n" + + " \"number_of_shards\" : 1\n" + + " }," + + " \"mappings\": {\n" + + " \"user\": {\n" + + " \"properties\": {\n" + + " \"name\": { \"type\": \"text\" },\n" + + " \"user_name\": { \"type\": \"keyword\" },\n" + + " \"email\": { \"type\": \"keyword\" }\n" + + " }\n" + + " },\n" + + " \"tweet\": {\n" + + " \"properties\": {\n" + + " \"content\": { \"type\": \"text\" },\n" + + " \"user_name\": { \"type\": \"keyword\" },\n" + + " \"tweeted_at\": { \"type\": \"date\" }\n" + + " }\n" + + " },\n" + + " \"following\": {\n" + + " \"properties\": {\n" + + " \"count\": { \"type\": \"integer\" },\n" + + " \"followers\": { \"type\": \"string\" }\n" + + " }\n" + + " }\n" + + " }\n" + + "}" + + "\n" + + " }\n" + + "}"; + } + + Map doPutIndex(String indexName) throws Exception { + var testString = makeMultiTypePutIndexRequest(indexName); + return indexTypeMappingRewriter.transformJson(objMapper.readValue(testString, LinkedHashMap.class)); + } + + @Test + public void testPutSingleTypeIndex() throws Exception { + final String index = "indexA"; + var result = doPutIndex(index); + Assertions.assertEquals(objMapper.readValue(makeMultiTypePutIndexRequest(index), LinkedHashMap.class), + result); + } + + @Test + public void testMultiTypeIndex() throws Exception { + final String index = "socialTypes"; + var result = doPutIndex(index); + var expected = objMapper.readTree(makeMultiTypePutIndexRequest(index)); + var mappings = ((ObjectNode) expected.path(JsonKeysForHttpMessage.PAYLOAD_KEY) + .path(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY) + .path("mappings")); + mappings.remove("following"); + var newProperties = new HashMap(); + newProperties.put("type", Map.of("type", "keyword")); + var user = mappings.remove("user"); + user.path("properties").fields().forEachRemaining(e -> newProperties.put(e.getKey(), e.getValue())); + var tweet = mappings.remove("tweet"); + tweet.path("properties").fields().forEachRemaining(e -> newProperties.put(e.getKey(), e.getValue())); + var properties = mappings.put("properties", objMapper.valueToTree(newProperties)); + ((ObjectNode)expected).put(JsonKeysForHttpMessage.URI_KEY, "/communal"); + Assertions.assertEquals(expected, objMapper.readTree(objMapper.writeValueAsString(result))); } }