From a6d7d86894defa561614093f689b7ed05cb924f8 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:49:41 -0400 Subject: [PATCH] Refactor Grok validate pattern to iterative approach (#14206) (#14693) * grok validate patterns recusrion to iterative * Add max depth in resolving a pattern to avoid OOM * change path from deque to arraylist * rename queue to stack * Change max depth to 500 * typo originPatternName fix * spotless --------- (cherry picked from commit 13f04d97e3438b0389d1e851dfd08678ab644361) Signed-off-by: Sandesh Kumar Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- CHANGELOG.md | 1 + .../main/java/org/opensearch/grok/Grok.java | 122 +++++++++++++----- .../java/org/opensearch/grok/GrokTests.java | 10 ++ 3 files changed, 99 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ed5df09920e5..ad9585fc2f0a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix file cache initialization ([#14004](https://github.com/opensearch-project/OpenSearch/pull/14004)) - Handle NPE in GetResult if "found" field is missing ([#14552](https://github.com/opensearch-project/OpenSearch/pull/14552)) - Refactoring FilterPath.parse by using an iterative approach ([#14200](https://github.com/opensearch-project/OpenSearch/pull/14200)) +- Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206)) ### Security diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index 7aa3347ba4f4b..aa5b1a936b99d 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -37,14 +37,18 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Stack; +import java.util.Set; import java.util.function.Consumer; import org.jcodings.specific.UTF8Encoding; @@ -86,6 +90,7 @@ public final class Grok { UTF8Encoding.INSTANCE, Syntax.DEFAULT ); + private static final int MAX_PATTERN_DEPTH_SIZE = 500; private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit @@ -128,7 +133,7 @@ private Grok( expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE, - message -> logCallBack.accept(message) + logCallBack::accept ); List captureConfig = new ArrayList<>(); @@ -144,7 +149,7 @@ private Grok( */ private void validatePatternBank() { for (String patternName : patternBank.keySet()) { - validatePatternBank(patternName, new Stack<>()); + validatePatternBank(patternName); } } @@ -156,33 +161,84 @@ private void validatePatternBank() { * a reference to another named pattern. This method will navigate to all these named patterns and * check for a circular reference. */ - private void validatePatternBank(String patternName, Stack path) { - String pattern = patternBank.get(patternName); - boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":"); - if (isSelfReference) { - throwExceptionForCircularReference(patternName, pattern); - } else if (path.contains(patternName)) { - // current pattern name is already in the path, fetch its predecessor - String prevPatternName = path.pop(); - String prevPattern = patternBank.get(prevPatternName); - throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path); - } - path.push(patternName); - for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) { - int begin = i + 2; - int syntaxEndIndex = pattern.indexOf('}', begin); - if (syntaxEndIndex == -1) { - throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]"); + private void validatePatternBank(String initialPatternName) { + Deque stack = new ArrayDeque<>(); + Set visitedPatterns = new HashSet<>(); + Map> pathMap = new HashMap<>(); + + List initialPath = new ArrayList<>(); + initialPath.add(initialPatternName); + pathMap.put(initialPatternName, initialPath); + stack.push(new Frame(initialPatternName, initialPath, 0)); + + while (!stack.isEmpty()) { + Frame frame = stack.peek(); + String patternName = frame.patternName; + List path = frame.path; + int startIndex = frame.startIndex; + String pattern = patternBank.get(patternName); + + if (visitedPatterns.contains(patternName)) { + stack.pop(); + continue; + } + + visitedPatterns.add(patternName); + boolean foundDependency = false; + + for (int i = startIndex; i < pattern.length(); i++) { + if (pattern.startsWith("%{", i)) { + int begin = i + 2; + int syntaxEndIndex = pattern.indexOf('}', begin); + if (syntaxEndIndex == -1) { + throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]"); + } + + int semanticNameIndex = pattern.indexOf(':', begin); + int end = semanticNameIndex == -1 ? syntaxEndIndex : Math.min(syntaxEndIndex, semanticNameIndex); + + String dependsOnPattern = pattern.substring(begin, end); + + if (dependsOnPattern.equals(patternName)) { + throwExceptionForCircularReference(patternName, pattern); + } + + if (pathMap.containsKey(dependsOnPattern)) { + throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path.subList(0, path.size() - 1)); + } + + List newPath = new ArrayList<>(path); + newPath.add(dependsOnPattern); + pathMap.put(dependsOnPattern, newPath); + + stack.push(new Frame(dependsOnPattern, newPath, 0)); + frame.startIndex = i + 1; + foundDependency = true; + break; + } } - int semanticNameIndex = pattern.indexOf(':', begin); - int end = syntaxEndIndex; - if (semanticNameIndex != -1) { - end = Math.min(syntaxEndIndex, semanticNameIndex); + + if (!foundDependency) { + pathMap.remove(patternName); + stack.pop(); + } + + if (stack.size() > MAX_PATTERN_DEPTH_SIZE) { + throw new IllegalArgumentException("Pattern references exceeded maximum depth of " + MAX_PATTERN_DEPTH_SIZE); } - String dependsOnPattern = pattern.substring(begin, end); - validatePatternBank(dependsOnPattern, path); } - path.pop(); + } + + private static class Frame { + String patternName; + List path; + int startIndex; + + Frame(String patternName, List path, int startIndex) { + this.patternName = patternName; + this.path = path; + this.startIndex = startIndex; + } } private static void throwExceptionForCircularReference(String patternName, String pattern) { @@ -192,13 +248,13 @@ private static void throwExceptionForCircularReference(String patternName, Strin private static void throwExceptionForCircularReference( String patternName, String pattern, - String originPatterName, - Stack path + String originPatternName, + List path ) { StringBuilder message = new StringBuilder("circular reference in pattern ["); message.append(patternName).append("][").append(pattern).append("]"); - if (originPatterName != null) { - message.append(" back to pattern [").append(originPatterName).append("]"); + if (originPatternName != null) { + message.append(" back to pattern [").append(originPatternName).append("]"); } if (path != null && path.size() > 1) { message.append(" via patterns [").append(String.join("=>", path)).append("]"); @@ -217,9 +273,7 @@ private String groupMatch(String name, Region region, String pattern) { int begin = region.getBeg(number); int end = region.getEnd(number); return new String(pattern.getBytes(StandardCharsets.UTF_8), begin, end - begin, StandardCharsets.UTF_8); - } catch (StringIndexOutOfBoundsException e) { - return null; - } catch (ValueException e) { + } catch (StringIndexOutOfBoundsException | ValueException e) { return null; } } diff --git a/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java b/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java index a37689e051c67..8476d541aa46e 100644 --- a/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java @@ -377,6 +377,16 @@ public void testCircularReference() { "circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + "via patterns [NAME1=>NAME2=>NAME3=>NAME4]", e.getMessage() ); + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new TreeMap<>(); + for (int i = 1; i <= 501; i++) { + bank.put("NAME" + i, "!!!%{NAME" + (i + 1) + "}!!!"); + } + String pattern = "%{NAME1}"; + new Grok(bank, pattern, false, logger::warn); + }); + assertEquals("Pattern references exceeded maximum depth of 500", e.getMessage()); } public void testMalformedPattern() {