From 2d47e48765eee719c6ae297c316f3ec63bc10676 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Mon, 25 Sep 2023 16:42:15 +0300 Subject: [PATCH 01/13] Show scanner tokens --- .../kompile/CompiledDefinition.java | 1 + .../parser/inner/ParseInModule.java | 21 +++++++++++++++++++ .../parser/inner/kernel/EarleyParser.java | 8 +++---- .../parser/inner/kernel/Scanner.java | 10 ++++----- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java b/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java index a2342afd40d..f6279770b7a 100644 --- a/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java +++ b/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java @@ -198,6 +198,7 @@ public Option ruleParsingModuleFor(String moduleName) { public K parseSingleTerm(Module module, Sort programStartSymbol, String startSymbolLocation, KExceptionManager kem, FileUtil files, String s, Source source) { try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(module, true, files)) { + parseInModule.tokenizeString(s, programStartSymbol, startSymbolLocation, source); Tuple2, K>, Set> res = parseInModule.parseString(s, programStartSymbol, startSymbolLocation, source); kem.addAllKException(res._2().stream().map(e -> e.getKException()).collect(Collectors.toSet())); if (res._1().isLeft()) { diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 3aa8b4bf713..50a4789c2cf 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -7,6 +7,7 @@ import org.kframework.attributes.Source; import org.kframework.builtin.Sorts; import org.kframework.definition.Module; +import org.kframework.definition.TerminalLike; import org.kframework.kore.K; import org.kframework.kore.Sort; import org.kframework.main.GlobalOptions; @@ -29,9 +30,11 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; /** * A wrapper that takes a module and one can call the parser @@ -159,6 +162,24 @@ public void initialize() { } } + public Tuple2, K>, Set> + tokenizeString(String input, Sort startSymbol, String startSymbolLocation, Source source) { + try (Scanner scanner = getScanner()) { + EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); + Map kind2Token = + scanner.tokens.entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); + for (Scanner.Token word : mdata.words) { + System.out.format("%10s (%d,%d,%d,%d), %s\n", '`' + word.value + '`', + mdata.lines[word.startLoc], mdata.columns[word.startLoc], + mdata.lines[word.endLoc], mdata.columns[word.endLoc], + kind2Token.get(word.kind)); + } + System.exit(0); + return parseString(input, startSymbol, startSymbolLocation, scanner, source, 1, 1, true, false); + } + } + public Tuple2, K>, Set> parseString(String input, Sort startSymbol, String startSymbolLocation, Source source) { try (Scanner scanner = getScanner()) { diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index 7f48bda662e..418ef95d6c8 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -525,7 +525,7 @@ public void finish() { * Metadata about the state of the sentence being parsed. We collect this all in a single place in order to simplify * the type signatures of many methods. */ - private static class ParserMetadata { + public static class ParserMetadata { /** * @param input The sentence being parsed * @param scanner The scanner to use to tokenize the sentence @@ -583,11 +583,11 @@ public ParserMetadata(String input, Scanner scanner, Source source, int startLin } // the list of tokens in the sentence - Scanner.Token[] words; + public Scanner.Token[] words; // an array containing the line of each character in the input sentence - int[] lines; + public int[] lines; // an array containing the column of each character in the input sentence - int[] columns; + public int[] columns; // a Source containing the file the sentence was parsed from Source source; // the original un-tokenized input sentence diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java index d59300a5b39..48f532f964c 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java @@ -50,7 +50,7 @@ */ public class Scanner implements AutoCloseable { - private final Map> tokens; + public final Map> tokens; private final File scanner; private final Module module; private GlobalOptions go = new GlobalOptions(); @@ -349,10 +349,10 @@ public int resolve(TerminalLike terminal) { } public static class Token { - final int kind; - final String value; - final int startLoc; - final int endLoc; + public final int kind; + public final String value; + public final int startLoc; + public final int endLoc; public Token(int kind, String value, long startLoc, long endLoc) { this.kind = kind; From 2552184311d8ffcc1fdba8b4baf1fd7b0b7605f0 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Mon, 25 Sep 2023 17:15:39 +0300 Subject: [PATCH 02/13] Add option to kast --- .../org/kframework/kast/KastFrontEnd.java | 80 ++++++++++--------- .../java/org/kframework/kast/KastOptions.java | 3 + .../kompile/CompiledDefinition.java | 7 +- .../java/org/kframework/parser/KRead.java | 4 + .../parser/inner/ParseInModule.java | 14 ++-- 5 files changed, 65 insertions(+), 43 deletions(-) diff --git a/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java b/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java index 9156ed7c766..c222faa9924 100644 --- a/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java +++ b/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java @@ -135,35 +135,39 @@ public int run() { Source source = options.source(); try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(mod, true, null)) { - Tuple2, K>, Set> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source); - kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet())); - if (res._1().isLeft()) { - throw res._1().left().get().iterator().next(); + if (options.debugTokens) + System.out.println(parseInModule.tokenizeString(stringToParse, source)); + else { + Tuple2, K>, Set> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source); + kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet())); + if (res._1().isLeft()) { + throw res._1().left().get().iterator().next(); + } + // important to get the extension module for unparsing because it contains generated syntax + // like casts, projections and others + Module unparsingMod = parseInModule.getExtensionModule(); + K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get()); + + if (options.expandMacros) { + parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); + } + ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod); + LabelInfo labelInfo = new LabelInfoFromModule(mod); + SortInfo sortInfo = SortInfo.fromModule(mod); + + Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty()); + if (options.steps.contains(KompileSteps.anonVars)) + r = (Rule) new ResolveAnonVar().resolve(r); + r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates + r = (Rule) new ConstantFolding().fold(unparsingMod, r); + r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r); + if (options.steps.contains(KompileSteps.concretizeCells)) { + r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r); + r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r); + } + K result = r.body(); + kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort); } - // important to get the extension module for unparsing because it contains generated syntax - // like casts, projections and others - Module unparsingMod = parseInModule.getExtensionModule(); - K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get()); - - if (options.expandMacros) { - parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); - } - ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod); - LabelInfo labelInfo = new LabelInfoFromModule(mod); - SortInfo sortInfo = SortInfo.fromModule(mod); - - Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty()); - if (options.steps.contains(KompileSteps.anonVars)) - r = (Rule) new ResolveAnonVar().resolve(r); - r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates - r = (Rule) new ConstantFolding().fold(unparsingMod, r); - r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r); - if (options.steps.contains(KompileSteps.concretizeCells)) { - r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r); - r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r); - } - K result = r.body(); - kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort); } sw.printTotal("Total"); @@ -210,17 +214,21 @@ public int run() { } else { Reader stringToParse = options.stringToParse(); Source source = options.source(); - K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse)); + if (options.debugTokens) + System.out.println(kread.showTokens(parsingMod, def, FileUtil.read(stringToParse), source)); + else { + K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse)); - if (options.expandMacros) { - parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); - } + if (options.expandMacros) { + parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); + } - if (sort.equals(Sorts.K())) { - sort = Sorts.KItem(); - } + if (sort.equals(Sorts.K())) { + sort = Sorts.KItem(); + } - kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort); + kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort); + } } sw.printTotal("Total"); diff --git a/kernel/src/main/java/org/kframework/kast/KastOptions.java b/kernel/src/main/java/org/kframework/kast/KastOptions.java index 2aaa084b516..644515106b2 100644 --- a/kernel/src/main/java/org/kframework/kast/KastOptions.java +++ b/kernel/src/main/java/org/kframework/kast/KastOptions.java @@ -155,4 +155,7 @@ public Class enumClass() { return InputModes.class; } } + + @Parameter(names="--debug-tokens", description="Print the list of tokens matched by the scanner. Useful for debugging parsing errors.") + public boolean debugTokens = false; } diff --git a/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java b/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java index f6279770b7a..289619a9fa7 100644 --- a/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java +++ b/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java @@ -198,7 +198,6 @@ public Option ruleParsingModuleFor(String moduleName) { public K parseSingleTerm(Module module, Sort programStartSymbol, String startSymbolLocation, KExceptionManager kem, FileUtil files, String s, Source source) { try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(module, true, files)) { - parseInModule.tokenizeString(s, programStartSymbol, startSymbolLocation, source); Tuple2, K>, Set> res = parseInModule.parseString(s, programStartSymbol, startSymbolLocation, source); kem.addAllKException(res._2().stream().map(e -> e.getKException()).collect(Collectors.toSet())); if (res._1().isLeft()) { @@ -208,6 +207,12 @@ public K parseSingleTerm(Module module, Sort programStartSymbol, String startSym } } + public String showTokens(Module module, FileUtil files, String s, Source source) { + try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(module, true, files)) { + return parseInModule.tokenizeString(s, source); + } + } + public Module getExtensionModule(Module module, FileUtil files) { return RuleGrammarGenerator.getCombinedGrammar(module, true, files).getExtensionModule(); } diff --git a/kernel/src/main/java/org/kframework/parser/KRead.java b/kernel/src/main/java/org/kframework/parser/KRead.java index c5b50cf4afb..6ed83e89188 100644 --- a/kernel/src/main/java/org/kframework/parser/KRead.java +++ b/kernel/src/main/java/org/kframework/parser/KRead.java @@ -51,6 +51,10 @@ public KRead( this.globalOptions = globalOptions; } + public String showTokens(Module mod, CompiledDefinition def, String stringToParse, Source source) { + return def.showTokens(mod, files, stringToParse, source); + } + public K prettyRead(Module mod, Sort sort, String startSymbolLocation, CompiledDefinition def, Source source, String stringToParse) { return prettyRead(mod, sort, startSymbolLocation, def, source, stringToParse, this.input); } diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 50a4789c2cf..59ca0cebd5b 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -162,22 +162,24 @@ public void initialize() { } } - public Tuple2, K>, Set> - tokenizeString(String input, Sort startSymbol, String startSymbolLocation, Source source) { + /** + * Print the list of tokens matched by the scanner, the location and the Regex Terminal + */ + public String tokenizeString(String input, Source source) { + StringBuilder sb = new StringBuilder(" `Match` (location), Regex Terminal\n"); try (Scanner scanner = getScanner()) { EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); Map kind2Token = scanner.tokens.entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); for (Scanner.Token word : mdata.words) { - System.out.format("%10s (%d,%d,%d,%d), %s\n", '`' + word.value + '`', + sb.append(String.format("%10s (%d,%d,%d,%d), %s\n", '`' + word.value + '`', mdata.lines[word.startLoc], mdata.columns[word.startLoc], mdata.lines[word.endLoc], mdata.columns[word.endLoc], - kind2Token.get(word.kind)); + kind2Token.get(word.kind))); } - System.exit(0); - return parseString(input, startSymbol, startSymbolLocation, scanner, source, 1, 1, true, false); } + return sb.toString(); } public Tuple2, K>, Set> From 7a9b1ac77199f7dacbbaaa51be099fa8f04e9df9 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Mon, 25 Sep 2023 17:29:00 +0300 Subject: [PATCH 03/13] Add test --- .../regression-new/issue-3647-debugTokens/Makefile | 6 ++++++ .../issue-3647-debugTokens/a.test.kast | 1 + .../issue-3647-debugTokens/a.test.kast.out | 8 ++++++++ .../regression-new/issue-3647-debugTokens/test.k | 13 +++++++++++++ .../org/kframework/parser/inner/ParseInModule.java | 4 ++-- 5 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile create mode 100644 k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast create mode 100644 k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out create mode 100644 k-distribution/tests/regression-new/issue-3647-debugTokens/test.k diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile b/k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile new file mode 100644 index 00000000000..5ab56c86a70 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile @@ -0,0 +1,6 @@ +DEF=test +EXT=test +TESTDIR=. +KAST_FLAGS=--debug-tokens + +include ../../../include/kframework/ktest.mak diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast new file mode 100644 index 00000000000..c16357199a6 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast @@ -0,0 +1 @@ +a + 100 + 2 diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out new file mode 100644 index 00000000000..592def21f2c --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out @@ -0,0 +1,8 @@ +`Match` (location), Regex Terminal +`a` (1,1,1,2), r"[a-z][a-zA-Z0-9]*" +`+` (1,3,1,4), "+" +`100` (1,5,1,8), r"[\\+-]?[0-9]+" +`+` (1,9,1,10), "+" +`2` (1,11,1,12), r"[\\+-]?[0-9]+" +`` (2,1,2,1), null + diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k b/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k new file mode 100644 index 00000000000..bb79435dd85 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k @@ -0,0 +1,13 @@ +// Copyright (c) K Team. All Rights Reserved. + +module TEST-SYNTAX + imports INT-SYNTAX + imports ID-SYNTAX + syntax Exp ::= Exp "+" Exp [left] | Int | Id +endmodule + +module TEST + imports TEST-SYNTAX + configuration $PGM:Exp + +endmodule diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 59ca0cebd5b..3dfd6bbd494 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -166,14 +166,14 @@ public void initialize() { * Print the list of tokens matched by the scanner, the location and the Regex Terminal */ public String tokenizeString(String input, Source source) { - StringBuilder sb = new StringBuilder(" `Match` (location), Regex Terminal\n"); + StringBuilder sb = new StringBuilder("`Match` (location), Regex Terminal\n"); try (Scanner scanner = getScanner()) { EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); Map kind2Token = scanner.tokens.entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); for (Scanner.Token word : mdata.words) { - sb.append(String.format("%10s (%d,%d,%d,%d), %s\n", '`' + word.value + '`', + sb.append(String.format("%-10s (%d,%d,%d,%d), %s\n", '`' + word.value + '`', mdata.lines[word.startLoc], mdata.columns[word.startLoc], mdata.lines[word.endLoc], mdata.columns[word.endLoc], kind2Token.get(word.kind))); From fd5d0b16bbb130df8ab71cbebf0914f676af9888 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Tue, 26 Sep 2023 17:45:47 +0300 Subject: [PATCH 04/13] Code review --- .../parser/inner/ParseInModule.java | 23 +++++++------ .../parser/inner/kernel/EarleyParser.java | 32 +++++++++++-------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 3dfd6bbd494..3cbe2c78266 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -7,6 +7,7 @@ import org.kframework.attributes.Source; import org.kframework.builtin.Sorts; import org.kframework.definition.Module; +import org.kframework.definition.Terminal; import org.kframework.definition.TerminalLike; import org.kframework.kore.K; import org.kframework.kore.Sort; @@ -29,10 +30,7 @@ import java.io.IOException; import java.io.Serializable; import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.Map; -import java.util.Queue; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.stream.Collectors; @@ -166,17 +164,22 @@ public void initialize() { * Print the list of tokens matched by the scanner, the location and the Regex Terminal */ public String tokenizeString(String input, Source source) { - StringBuilder sb = new StringBuilder("`Match` (location), Regex Terminal\n"); + StringBuilder sb = new StringBuilder(); + sb.append("`Match` (location), Terminal\n"); + sb.append("------------------------------------\n"); try (Scanner scanner = getScanner()) { EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); Map kind2Token = scanner.tokens.entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); - for (Scanner.Token word : mdata.words) { - sb.append(String.format("%-10s (%d,%d,%d,%d), %s\n", '`' + word.value + '`', - mdata.lines[word.startLoc], mdata.columns[word.startLoc], - mdata.lines[word.endLoc], mdata.columns[word.endLoc], - kind2Token.get(word.kind))); + List lines = mdata.getLines(); + List columns = mdata.getColumns(); + for (Scanner.Token word : mdata.getWords()) { + String loc = String.format("(%d,%d,%d,%d),", + lines.get(word.startLoc), columns.get(word.startLoc), + lines.get(word.endLoc), columns.get(word.endLoc)); + sb.append(String.format("%-10s %-12s %s\n", '`' + word.value + '`', loc, + kind2Token.getOrDefault(word.kind, Terminal.apply("")))); } } return sb.toString(); diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index 418ef95d6c8..87b8fd638f5 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -24,18 +24,9 @@ import org.pcollections.PStack; import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; +import java.util.stream.IntStream; import static org.kframework.Collections.*; import static org.kframework.kore.KORE.*; @@ -582,12 +573,25 @@ public ParserMetadata(String input, Scanner scanner, Source source, int startLin this.input = input; } + public List getWords() { + return List.of(words); + } + // the list of tokens in the sentence - public Scanner.Token[] words; + private final Scanner.Token[] words; + + public List getLines() { + return Arrays.stream(lines).boxed().collect(Collectors.toList()); + } + + public List getColumns() { + return Arrays.stream(columns).boxed().collect(Collectors.toList()); + } + // an array containing the line of each character in the input sentence - public int[] lines; + public final int[] lines; // an array containing the column of each character in the input sentence - public int[] columns; + private int[] columns; // a Source containing the file the sentence was parsed from Source source; // the original un-tokenized input sentence From 81a8dffae401e7d24d72b37b654ca077dbf72792 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Tue, 26 Sep 2023 17:49:02 +0300 Subject: [PATCH 05/13] More unmodifiable collections --- .../kframework/parser/inner/ParseInModule.java | 2 +- .../kframework/parser/inner/kernel/Scanner.java | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 3cbe2c78266..8e3855c0c22 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -170,7 +170,7 @@ public String tokenizeString(String input, Source source) { try (Scanner scanner = getScanner()) { EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); Map kind2Token = - scanner.tokens.entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) + scanner.getTokens().entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); List lines = mdata.getLines(); List columns = mdata.getColumns(); diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java index 48f532f964c..b71d0c44c16 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java @@ -30,14 +30,7 @@ import java.nio.ByteOrder; import java.nio.file.StandardCopyOption; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; import java.util.concurrent.Semaphore; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -50,13 +43,17 @@ */ public class Scanner implements AutoCloseable { - public final Map> tokens; + private final Map> tokens; private final File scanner; private final Module module; private GlobalOptions go = new GlobalOptions(); public static final String COMPILER = OS.current().equals(OS.OSX) ? "clang" : "gcc"; + public Map> getTokens() { + return Collections.unmodifiableMap(tokens); + } + public static Map> getTokens(Module module) { Map tokens = new TreeMap<>(); Set terminals = new HashSet<>(); From c473a29546a3749200d66afc9381d886f909aabf Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Tue, 26 Sep 2023 17:50:27 +0300 Subject: [PATCH 06/13] Update test output --- .../issue-3647-debugTokens/a.test.kast.out | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out index 592def21f2c..87ee39f750d 100644 --- a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out @@ -1,8 +1,9 @@ -`Match` (location), Regex Terminal -`a` (1,1,1,2), r"[a-z][a-zA-Z0-9]*" -`+` (1,3,1,4), "+" -`100` (1,5,1,8), r"[\\+-]?[0-9]+" -`+` (1,9,1,10), "+" +`Match` (location), Terminal +------------------------------------ +`a` (1,1,1,2), r"[a-z][a-zA-Z0-9]*" +`+` (1,3,1,4), "+" +`100` (1,5,1,8), r"[\\+-]?[0-9]+" +`+` (1,9,1,10), "+" `2` (1,11,1,12), r"[\\+-]?[0-9]+" -`` (2,1,2,1), null +`` (2,1,2,1), "" From c1d257d6865087a1d4eae5528a3e78dce1e33d9e Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Tue, 26 Sep 2023 19:18:53 +0300 Subject: [PATCH 07/13] Refactor code remove some stray publics --- .../parser/inner/kernel/EarleyParser.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index 87b8fd638f5..debfeec01d4 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -573,13 +573,21 @@ public ParserMetadata(String input, Scanner scanner, Source source, int startLin this.input = input; } + // the list of tokens in the sentence + final Scanner.Token[] words; + // an array containing the line of each character in the input sentence + final int[] lines; + // an array containing the column of each character in the input sentence + final int[] columns; + // a Source containing the file the sentence was parsed from + Source source; + // the original un-tokenized input sentence + String input; + public List getWords() { return List.of(words); } - // the list of tokens in the sentence - private final Scanner.Token[] words; - public List getLines() { return Arrays.stream(lines).boxed().collect(Collectors.toList()); } @@ -587,15 +595,6 @@ public List getLines() { public List getColumns() { return Arrays.stream(columns).boxed().collect(Collectors.toList()); } - - // an array containing the line of each character in the input sentence - public final int[] lines; - // an array containing the column of each character in the input sentence - private int[] columns; - // a Source containing the file the sentence was parsed from - Source source; - // the original un-tokenized input sentence - String input; } /** From 8b2b24e0ca7106d9cc1acde66cf1e296983acb17 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Tue, 26 Sep 2023 19:21:33 +0300 Subject: [PATCH 08/13] Remove imports --- .../java/org/kframework/parser/inner/kernel/EarleyParser.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index debfeec01d4..f87dfe4ead6 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -23,10 +23,8 @@ import org.pcollections.ConsPStack; import org.pcollections.PStack; -import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.IntStream; import static org.kframework.Collections.*; import static org.kframework.kore.KORE.*; From 31ceb50a4686144a4f3b6427b6f3b1c32b1e2fa5 Mon Sep 17 00:00:00 2001 From: devops Date: Tue, 26 Sep 2023 16:40:54 +0000 Subject: [PATCH 09/13] Update Nix lock files --- nix/mavenix.lock | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/nix/mavenix.lock b/nix/mavenix.lock index 8e22af9e1f4..55edc276b6a 100644 --- a/nix/mavenix.lock +++ b/nix/mavenix.lock @@ -2678,12 +2678,12 @@ "sha1": "df7f15de037a1ee4d57d2ed779739089f560338c" }, { - "path": "net/java/dev/jna/jna/4.1.0/jna-4.1.0.jar", - "sha1": "1c12d070e602efd8021891cdd7fd18bc129372d4" + "path": "net/java/dev/jna/jna/5.13.0/jna-5.13.0.jar", + "sha1": "1200e7ebeedbe0d10062093f32925a912020e747" }, { - "path": "net/java/dev/jna/jna/4.1.0/jna-4.1.0.pom", - "sha1": "05e315ccb7e487e68bf19124d736afbc9136c631" + "path": "net/java/dev/jna/jna/5.13.0/jna-5.13.0.pom", + "sha1": "b7cc05a5394544befc936c39080a93cc8c1e082e" }, { "path": "net/java/jvnet-parent/1/jvnet-parent-1.pom", @@ -7330,20 +7330,20 @@ "sha1": "e8848369738c03e40af5507686216f9b8b44b6a3" }, { - "path": "org/kframework/dependencies/nailgun-all/1.0.0-SNAPSHOT/nailgun-all-1.0.0-20230817.181106-1.pom", + "path": "org/kframework/dependencies/nailgun-all/1.0.0-SNAPSHOT/nailgun-all-1.0.0-20230818.165756-2.pom", "sha1": "81f6a397361513f34b26d5f43d329df888f2feaa" }, { - "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT/nailgun-server-1.0.0-20230817.181113-1.jar", - "sha1": "04e2c42db23399a624ac3c4dce874a66749379c7" + "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT/nailgun-server-1.0.0-20230818.165803-2.jar", + "sha1": "5ee76d15654f02903cc47737f2436c26e8890950" }, { - "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT/nailgun-server-1.0.0-20230817.181113-1.jar", - "sha1": "04e2c42db23399a624ac3c4dce874a66749379c7" + "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT/nailgun-server-1.0.0-20230818.165803-2.jar", + "sha1": "5ee76d15654f02903cc47737f2436c26e8890950" }, { - "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT/nailgun-server-1.0.0-20230817.181113-1.pom", - "sha1": "3c8bc01cd1f032dd7d78f2e3d6198e4d51502693" + "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT/nailgun-server-1.0.0-20230818.165803-2.pom", + "sha1": "0b6e24b04b9c45f192ec3e23e5fdbd2627049def" }, { "path": "org/kframework/dependencies/ng/0.9.2-k4.0/ng-0.9.2-k4.0-linux.uexe", @@ -8209,11 +8209,11 @@ "path": "com/google/code/gson/gson" }, { - "content": "\n\n org.kframework.dependencies\n nailgun-all\n 1.0.0-SNAPSHOT\n \n \n 20230817.181106\n 1\n \n 20230817181106\n \n \n pom\n 1.0.0-20230817.181106-1\n 20230817181106\n \n \n \n", + "content": "\n\n org.kframework.dependencies\n nailgun-all\n 1.0.0-SNAPSHOT\n \n \n 20230818.165756\n 2\n \n 20230818165756\n \n \n pom\n 1.0.0-20230818.165756-2\n 20230818165756\n \n \n \n", "path": "org/kframework/dependencies/nailgun-all/1.0.0-SNAPSHOT" }, { - "content": "\n\n org.kframework.dependencies\n nailgun-server\n 1.0.0-SNAPSHOT\n \n \n 20230817.181113\n 1\n \n 20230817181113\n \n \n jar\n 1.0.0-20230817.181113-1\n 20230817181113\n \n \n pom\n 1.0.0-20230817.181113-1\n 20230817181113\n \n \n sources\n jar\n 1.0.0-20230817.181113-1\n 20230817181113\n \n \n javadoc\n jar\n 1.0.0-20230817.181113-1\n 20230817181113\n \n \n \n", + "content": "\n\n org.kframework.dependencies\n nailgun-server\n 1.0.0-SNAPSHOT\n \n \n 20230818.165803\n 2\n \n 20230818165803\n \n \n jar\n 1.0.0-20230818.165803-2\n 20230818165803\n \n \n pom\n 1.0.0-20230818.165803-2\n 20230818165803\n \n \n sources\n jar\n 1.0.0-20230818.165803-2\n 20230818165803\n \n \n javadoc\n jar\n 1.0.0-20230818.165803-2\n 20230818165803\n \n \n \n", "path": "org/kframework/dependencies/nailgun-server/1.0.0-SNAPSHOT" }, { From 1601fad223a0ba1d9a4ca3ab3fe5a05feb3a3150 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Wed, 27 Sep 2023 18:14:35 +0300 Subject: [PATCH 10/13] Fix output --- .../issue-3647-debugTokens/a.test.kast | 15 +++++++++++++- .../issue-3647-debugTokens/a.test.kast.out | 20 ++++++++++++------- .../issue-3647-debugTokens/test.k | 3 ++- .../parser/inner/ParseInModule.java | 15 +++++++++++--- .../parser/inner/kernel/EarleyParser.java | 5 +++-- 5 files changed, 44 insertions(+), 14 deletions(-) diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast index c16357199a6..90f692d623b 100644 --- a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast @@ -1 +1,14 @@ -a + 100 + 2 +1 + 2 + aaaaaaaaaaaa + + + + + + + + + + ++ 10000000 ++ "str" ++ "long str that breaks alighnment" diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out index 87ee39f750d..3fe089c16fe 100644 --- a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out @@ -1,9 +1,15 @@ -`Match` (location), Terminal +(location) "Match" Terminal ------------------------------------ -`a` (1,1,1,2), r"[a-z][a-zA-Z0-9]*" -`+` (1,3,1,4), "+" -`100` (1,5,1,8), r"[\\+-]?[0-9]+" -`+` (1,9,1,10), "+" -`2` (1,11,1,12), r"[\\+-]?[0-9]+" -`` (2,1,2,1), "" +(1,1,1,2), "1" r"[\\+-]?[0-9]+" +(1,3,1,4), "+" "+" +(1,5,1,6), "2" r"[\\+-]?[0-9]+" +(1,7,1,8), "+" "+" +(1,9,1,21), "aaaaaaaaaaaa" r"[a-z][a-zA-Z0-9]*" +(12,1,12,2), "+" "+" +(12,3,12,11), "10000000" r"[\\+-]?[0-9]+" +(13,1,13,2), "+" "+" +(13,3,13,8), "\"str\"" r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]" +(14,1,14,2), "+" "+" +(14,3,14,36), "\"long str that breaks alighnment\"" r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]" +(15,1,15,1), "" "" diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k b/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k index bb79435dd85..90b98cf29a9 100644 --- a/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k @@ -3,7 +3,8 @@ module TEST-SYNTAX imports INT-SYNTAX imports ID-SYNTAX - syntax Exp ::= Exp "+" Exp [left] | Int | Id + imports STRING-SYNTAX + syntax Exp ::= Exp "+" Exp [left] | Int | Id | String endmodule module TEST diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 8e3855c0c22..dc32e673cea 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -18,6 +18,7 @@ import org.kframework.parser.inner.kernel.EarleyParser; import org.kframework.parser.inner.kernel.Scanner; import org.kframework.parser.outer.Outer; +import org.kframework.utils.StringUtil; import org.kframework.utils.errorsystem.KEMException; import org.kframework.utils.file.FileUtil; import scala.Tuple2; @@ -165,8 +166,6 @@ public void initialize() { */ public String tokenizeString(String input, Source source) { StringBuilder sb = new StringBuilder(); - sb.append("`Match` (location), Terminal\n"); - sb.append("------------------------------------\n"); try (Scanner scanner = getScanner()) { EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); Map kind2Token = @@ -174,11 +173,21 @@ public String tokenizeString(String input, Source source) { .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); List lines = mdata.getLines(); List columns = mdata.getColumns(); + int maxLocLen = 0; + List locs = new ArrayList<>(); + List words = mdata.getWords(); for (Scanner.Token word : mdata.getWords()) { String loc = String.format("(%d,%d,%d,%d),", lines.get(word.startLoc), columns.get(word.startLoc), lines.get(word.endLoc), columns.get(word.endLoc)); - sb.append(String.format("%-10s %-12s %s\n", '`' + word.value + '`', loc, + locs.add(loc); + maxLocLen = Math.max(maxLocLen, loc.length()); + } + sb.append(String.format("%-" + maxLocLen + "s \"Match\" Terminal\n", "(location)")); + sb.append("------------------------------------\n"); + for (int i = 0; i < words.size(); i++) { + Scanner.Token word = words.get(i); + sb.append(String.format("%-" + maxLocLen + "s %-12s %s\n", locs.get(i), StringUtil.enquoteKString(word.value), kind2Token.getOrDefault(word.kind, Terminal.apply("")))); } } diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index f87dfe4ead6..035f81b1245 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -1,6 +1,7 @@ // Copyright (c) K Team. All Rights Reserved. package org.kframework.parser.inner.kernel; +import com.google.common.primitives.Ints; import org.apache.commons.codec.binary.StringUtils; import org.jetbrains.annotations.NotNull; import org.kframework.attributes.Att; @@ -587,11 +588,11 @@ public List getWords() { } public List getLines() { - return Arrays.stream(lines).boxed().collect(Collectors.toList()); + return Ints.asList(lines); } public List getColumns() { - return Arrays.stream(columns).boxed().collect(Collectors.toList()); + return Ints.asList(columns); } } From b87588e6f51612a85fd6a06a93793b20c88fd708 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Mon, 2 Oct 2023 16:51:18 +0300 Subject: [PATCH 11/13] Unmodifiable list --- .../java/org/kframework/parser/inner/kernel/EarleyParser.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index 035f81b1245..8f3f541295e 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -588,11 +588,11 @@ public List getWords() { } public List getLines() { - return Ints.asList(lines); + return Collections.unmodifiableList(Ints.asList(lines)); } public List getColumns() { - return Ints.asList(columns); + return Collections.unmodifiableList(Ints.asList(columns)); } } From c27506795722518d4ec98d41428e3537cdf378e8 Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Mon, 2 Oct 2023 17:53:11 +0300 Subject: [PATCH 12/13] Output Markdown table and dynamic column widths. --- .../issue-3647-debugTokens/a.test.kast | 2 +- .../issue-3647-debugTokens/a.test.kast.out | 28 +++++++++---------- .../issue-3647-debugTokens/b.test.kast | 1 + .../issue-3647-debugTokens/b.test.kast.out | 9 ++++++ .../parser/inner/ParseInModule.java | 25 +++++++++++++---- 5 files changed, 44 insertions(+), 21 deletions(-) create mode 100644 k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast create mode 100644 k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast index 90f692d623b..0d8c763edc3 100644 --- a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast @@ -11,4 +11,4 @@ + 10000000 + "str" -+ "long str that breaks alighnment" ++ "long str that breaks alighnment" diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out index 3fe089c16fe..903a7ba1fd5 100644 --- a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out @@ -1,15 +1,15 @@ -(location) "Match" Terminal ------------------------------------- -(1,1,1,2), "1" r"[\\+-]?[0-9]+" -(1,3,1,4), "+" "+" -(1,5,1,6), "2" r"[\\+-]?[0-9]+" -(1,7,1,8), "+" "+" -(1,9,1,21), "aaaaaaaaaaaa" r"[a-z][a-zA-Z0-9]*" -(12,1,12,2), "+" "+" -(12,3,12,11), "10000000" r"[\\+-]?[0-9]+" -(13,1,13,2), "+" "+" -(13,3,13,8), "\"str\"" r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]" -(14,1,14,2), "+" "+" -(14,3,14,36), "\"long str that breaks alighnment\"" r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]" -(15,1,15,1), "" "" +|"Match" | (location) | Terminal | +|---------------------------------------------------------------------------------|---------------|---------------------| +|"1" | (1,1,1,2) | r"[\\+-]?[0-9]+" | +|"+" | (1,3,1,4) | "+" | +|"2" | (1,5,1,6) | r"[\\+-]?[0-9]+" | +|"+" | (1,7,1,8) | "+" | +|"aaaaaaaaaaaa" | (1,9,1,21) | r"[a-z][a-zA-Z0-9]*"| +|"+" | (12,1,12,2) | "+" | +|"10000000" | (12,3,12,11) | r"[\\+-]?[0-9]+" | +|"+" | (13,1,13,2) | "+" | +|"\"str\"" | (13,3,13,8) | r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]"| +|"+" | (14,1,14,2) | "+" | +|"\"long str that breaks alighnment\"" | (14,3,14,103) | r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]"| +|"" | (15,1,15,1) | "" | diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast new file mode 100644 index 00000000000..f845bb7df41 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast @@ -0,0 +1 @@ +1 + 2 + aaa diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out new file mode 100644 index 00000000000..1525daf1bc8 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out @@ -0,0 +1,9 @@ +|"Match" | (location) | Terminal | +|--------|------------|---------------------| +|"1" | (1,1,1,2) | r"[\\+-]?[0-9]+" | +|"+" | (1,3,1,4) | "+" | +|"2" | (1,5,1,6) | r"[\\+-]?[0-9]+" | +|"+" | (1,7,1,8) | "+" | +|"aaa" | (1,9,1,12) | r"[a-z][a-zA-Z0-9]*"| +|"" | (2,1,2,1) | "" | + diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index dc32e673cea..4e3cbdb4066 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -163,6 +163,7 @@ public void initialize() { /** * Print the list of tokens matched by the scanner, the location and the Regex Terminal + * The output is a valid Markdown table. */ public String tokenizeString(String input, Source source) { StringBuilder sb = new StringBuilder(); @@ -173,22 +174,34 @@ public String tokenizeString(String input, Source source) { .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); List lines = mdata.getLines(); List columns = mdata.getColumns(); - int maxLocLen = 0; + int maxTokenLen = 7, maxLocLen = 10, maxTerminalLen = 10; List locs = new ArrayList<>(); + List tokens = new ArrayList<>(); + List terminals = new ArrayList<>(); List words = mdata.getWords(); for (Scanner.Token word : mdata.getWords()) { - String loc = String.format("(%d,%d,%d,%d),", + String loc = String.format("(%d,%d,%d,%d)", lines.get(word.startLoc), columns.get(word.startLoc), lines.get(word.endLoc), columns.get(word.endLoc)); locs.add(loc); maxLocLen = Math.max(maxLocLen, loc.length()); + String tok = StringUtil.enquoteKString(word.value); + tokens.add(tok); + maxTokenLen = Math.max(maxTokenLen, tok.length()); + String terminal = kind2Token.getOrDefault(word.kind, Terminal.apply("")).toString(); + terminals.add(terminal); + maxTerminalLen = Math.max(maxTerminalLen, terminal.length()); } - sb.append(String.format("%-" + maxLocLen + "s \"Match\" Terminal\n", "(location)")); - sb.append("------------------------------------\n"); + // if the token is absurdly large limit the column to 80 chars to maintain alignment + maxTokenLen = Math.min(maxTokenLen, 80); + maxTerminalLen = Math.min(maxTerminalLen, 20); + sb.append(String.format("|%-" + maxTokenLen + "s | %-" + maxLocLen + "s | %-" + maxTerminalLen + "s|\n", + "\"Match\"", "(location)", "Terminal")); + sb.append(String.format("|-%s|--%s|-%s|\n", "-".repeat(maxTokenLen), "-".repeat(maxLocLen), "-".repeat(maxTerminalLen))); for (int i = 0; i < words.size(); i++) { Scanner.Token word = words.get(i); - sb.append(String.format("%-" + maxLocLen + "s %-12s %s\n", locs.get(i), StringUtil.enquoteKString(word.value), - kind2Token.getOrDefault(word.kind, Terminal.apply("")))); + sb.append(String.format("|%-" + maxTokenLen + "s | %-" + maxLocLen + "s | %-" + maxTerminalLen + "s|\n", + tokens.get(i), locs.get(i), terminals.get(i))); } } return sb.toString(); From 237eb52cbc0391266a6f382135137a516d369e7c Mon Sep 17 00:00:00 2001 From: Radu Mereuta Date: Mon, 2 Oct 2023 18:06:48 +0300 Subject: [PATCH 13/13] Update cmd option description --- kernel/src/main/java/org/kframework/kast/KastOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/main/java/org/kframework/kast/KastOptions.java b/kernel/src/main/java/org/kframework/kast/KastOptions.java index 644515106b2..add6323ceb2 100644 --- a/kernel/src/main/java/org/kframework/kast/KastOptions.java +++ b/kernel/src/main/java/org/kframework/kast/KastOptions.java @@ -156,6 +156,6 @@ public Class enumClass() { } } - @Parameter(names="--debug-tokens", description="Print the list of tokens matched by the scanner. Useful for debugging parsing errors.") + @Parameter(names="--debug-tokens", description="Print a Markdown table of tokens matched by the scanner. Useful for debugging parsing errors.") public boolean debugTokens = false; }