diff --git a/k-distribution/src/main/scripts/lib/k b/k-distribution/src/main/scripts/lib/k index 34f4bf3b639..1d510a2ff60 100755 --- a/k-distribution/src/main/scripts/lib/k +++ b/k-distribution/src/main/scripts/lib/k @@ -22,7 +22,7 @@ for flag in "$@"; do fi if [[ -z "${version}" ]]; then - version="v"$(cat "${K_BASE}"/version) + version="v"$(cat "${K_BASE}"/kframework/version) fi echo "K version: ${version}" diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile b/k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile new file mode 100644 index 00000000000..5ab56c86a70 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/Makefile @@ -0,0 +1,6 @@ +DEF=test +EXT=test +TESTDIR=. +KAST_FLAGS=--debug-tokens + +include ../../../include/kframework/ktest.mak diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast new file mode 100644 index 00000000000..0d8c763edc3 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast @@ -0,0 +1,14 @@ +1 + 2 + aaaaaaaaaaaa + + + + + + + + + + ++ 10000000 ++ "str" ++ "long str that breaks alighnment" diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out new file mode 100644 index 00000000000..903a7ba1fd5 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/a.test.kast.out @@ -0,0 +1,15 @@ +|"Match" | (location) | Terminal | +|---------------------------------------------------------------------------------|---------------|---------------------| +|"1" | (1,1,1,2) | r"[\\+-]?[0-9]+" | +|"+" | (1,3,1,4) | "+" | +|"2" | (1,5,1,6) | r"[\\+-]?[0-9]+" | +|"+" | (1,7,1,8) | "+" | +|"aaaaaaaaaaaa" | (1,9,1,21) | r"[a-z][a-zA-Z0-9]*"| +|"+" | (12,1,12,2) | "+" | +|"10000000" | (12,3,12,11) | r"[\\+-]?[0-9]+" | +|"+" | (13,1,13,2) | "+" | +|"\"str\"" | (13,3,13,8) | r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]"| +|"+" | (14,1,14,2) | "+" | +|"\"long str that breaks alighnment\"" | (14,3,14,103) | r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]"| +|"" | (15,1,15,1) | "" | + diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast new file mode 100644 index 00000000000..f845bb7df41 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast @@ -0,0 +1 @@ +1 + 2 + aaa diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out new file mode 100644 index 00000000000..1525daf1bc8 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/b.test.kast.out @@ -0,0 +1,9 @@ +|"Match" | (location) | Terminal | +|--------|------------|---------------------| +|"1" | (1,1,1,2) | r"[\\+-]?[0-9]+" | +|"+" | (1,3,1,4) | "+" | +|"2" | (1,5,1,6) | r"[\\+-]?[0-9]+" | +|"+" | (1,7,1,8) | "+" | +|"aaa" | (1,9,1,12) | r"[a-z][a-zA-Z0-9]*"| +|"" | (2,1,2,1) | "" | + diff --git a/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k b/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k new file mode 100644 index 00000000000..90b98cf29a9 --- /dev/null +++ b/k-distribution/tests/regression-new/issue-3647-debugTokens/test.k @@ -0,0 +1,14 @@ +// Copyright (c) K Team. All Rights Reserved. + +module TEST-SYNTAX + imports INT-SYNTAX + imports ID-SYNTAX + imports STRING-SYNTAX + syntax Exp ::= Exp "+" Exp [left] | Int | Id | String +endmodule + +module TEST + imports TEST-SYNTAX + configuration $PGM:Exp + +endmodule diff --git a/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java b/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java index 9156ed7c766..c222faa9924 100644 --- a/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java +++ b/kernel/src/main/java/org/kframework/kast/KastFrontEnd.java @@ -135,35 +135,39 @@ public int run() { Source source = options.source(); try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(mod, true, null)) { - Tuple2, K>, Set> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source); - kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet())); - if (res._1().isLeft()) { - throw res._1().left().get().iterator().next(); + if (options.debugTokens) + System.out.println(parseInModule.tokenizeString(stringToParse, source)); + else { + Tuple2, K>, Set> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source); + kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet())); + if (res._1().isLeft()) { + throw res._1().left().get().iterator().next(); + } + // important to get the extension module for unparsing because it contains generated syntax + // like casts, projections and others + Module unparsingMod = parseInModule.getExtensionModule(); + K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get()); + + if (options.expandMacros) { + parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); + } + ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod); + LabelInfo labelInfo = new LabelInfoFromModule(mod); + SortInfo sortInfo = SortInfo.fromModule(mod); + + Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty()); + if (options.steps.contains(KompileSteps.anonVars)) + r = (Rule) new ResolveAnonVar().resolve(r); + r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates + r = (Rule) new ConstantFolding().fold(unparsingMod, r); + r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r); + if (options.steps.contains(KompileSteps.concretizeCells)) { + r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r); + r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r); + } + K result = r.body(); + kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort); } - // important to get the extension module for unparsing because it contains generated syntax - // like casts, projections and others - Module unparsingMod = parseInModule.getExtensionModule(); - K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get()); - - if (options.expandMacros) { - parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); - } - ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod); - LabelInfo labelInfo = new LabelInfoFromModule(mod); - SortInfo sortInfo = SortInfo.fromModule(mod); - - Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty()); - if (options.steps.contains(KompileSteps.anonVars)) - r = (Rule) new ResolveAnonVar().resolve(r); - r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates - r = (Rule) new ConstantFolding().fold(unparsingMod, r); - r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r); - if (options.steps.contains(KompileSteps.concretizeCells)) { - r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r); - r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r); - } - K result = r.body(); - kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort); } sw.printTotal("Total"); @@ -210,17 +214,21 @@ public int run() { } else { Reader stringToParse = options.stringToParse(); Source source = options.source(); - K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse)); + if (options.debugTokens) + System.out.println(kread.showTokens(parsingMod, def, FileUtil.read(stringToParse), source)); + else { + K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse)); - if (options.expandMacros) { - parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); - } + if (options.expandMacros) { + parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed); + } - if (sort.equals(Sorts.K())) { - sort = Sorts.KItem(); - } + if (sort.equals(Sorts.K())) { + sort = Sorts.KItem(); + } - kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort); + kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort); + } } sw.printTotal("Total"); diff --git a/kernel/src/main/java/org/kframework/kast/KastOptions.java b/kernel/src/main/java/org/kframework/kast/KastOptions.java index 86402c537c6..7ef74e430c6 100644 --- a/kernel/src/main/java/org/kframework/kast/KastOptions.java +++ b/kernel/src/main/java/org/kframework/kast/KastOptions.java @@ -158,4 +158,7 @@ public Class enumClass() { return InputModes.class; } } + + @Parameter(names="--debug-tokens", description="Print a Markdown table of tokens matched by the scanner. Useful for debugging parsing errors.") + public boolean debugTokens = false; } diff --git a/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java b/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java index a2342afd40d..289619a9fa7 100644 --- a/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java +++ b/kernel/src/main/java/org/kframework/kompile/CompiledDefinition.java @@ -207,6 +207,12 @@ public K parseSingleTerm(Module module, Sort programStartSymbol, String startSym } } + public String showTokens(Module module, FileUtil files, String s, Source source) { + try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(module, true, files)) { + return parseInModule.tokenizeString(s, source); + } + } + public Module getExtensionModule(Module module, FileUtil files) { return RuleGrammarGenerator.getCombinedGrammar(module, true, files).getExtensionModule(); } diff --git a/kernel/src/main/java/org/kframework/parser/KRead.java b/kernel/src/main/java/org/kframework/parser/KRead.java index c5b50cf4afb..6ed83e89188 100644 --- a/kernel/src/main/java/org/kframework/parser/KRead.java +++ b/kernel/src/main/java/org/kframework/parser/KRead.java @@ -51,6 +51,10 @@ public KRead( this.globalOptions = globalOptions; } + public String showTokens(Module mod, CompiledDefinition def, String stringToParse, Source source) { + return def.showTokens(mod, files, stringToParse, source); + } + public K prettyRead(Module mod, Sort sort, String startSymbolLocation, CompiledDefinition def, Source source, String stringToParse) { return prettyRead(mod, sort, startSymbolLocation, def, source, stringToParse, this.input); } diff --git a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java index 3aa8b4bf713..4e3cbdb4066 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java +++ b/kernel/src/main/java/org/kframework/parser/inner/ParseInModule.java @@ -7,6 +7,8 @@ import org.kframework.attributes.Source; import org.kframework.builtin.Sorts; import org.kframework.definition.Module; +import org.kframework.definition.Terminal; +import org.kframework.definition.TerminalLike; import org.kframework.kore.K; import org.kframework.kore.Sort; import org.kframework.main.GlobalOptions; @@ -16,6 +18,7 @@ import org.kframework.parser.inner.kernel.EarleyParser; import org.kframework.parser.inner.kernel.Scanner; import org.kframework.parser.outer.Outer; +import org.kframework.utils.StringUtil; import org.kframework.utils.errorsystem.KEMException; import org.kframework.utils.file.FileUtil; import scala.Tuple2; @@ -28,10 +31,9 @@ import java.io.IOException; import java.io.Serializable; import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.Queue; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; /** * A wrapper that takes a module and one can call the parser @@ -159,6 +161,52 @@ public void initialize() { } } + /** + * Print the list of tokens matched by the scanner, the location and the Regex Terminal + * The output is a valid Markdown table. + */ + public String tokenizeString(String input, Source source) { + StringBuilder sb = new StringBuilder(); + try (Scanner scanner = getScanner()) { + EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1); + Map kind2Token = + scanner.getTokens().entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey())) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2)); + List lines = mdata.getLines(); + List columns = mdata.getColumns(); + int maxTokenLen = 7, maxLocLen = 10, maxTerminalLen = 10; + List locs = new ArrayList<>(); + List tokens = new ArrayList<>(); + List terminals = new ArrayList<>(); + List words = mdata.getWords(); + for (Scanner.Token word : mdata.getWords()) { + String loc = String.format("(%d,%d,%d,%d)", + lines.get(word.startLoc), columns.get(word.startLoc), + lines.get(word.endLoc), columns.get(word.endLoc)); + locs.add(loc); + maxLocLen = Math.max(maxLocLen, loc.length()); + String tok = StringUtil.enquoteKString(word.value); + tokens.add(tok); + maxTokenLen = Math.max(maxTokenLen, tok.length()); + String terminal = kind2Token.getOrDefault(word.kind, Terminal.apply("")).toString(); + terminals.add(terminal); + maxTerminalLen = Math.max(maxTerminalLen, terminal.length()); + } + // if the token is absurdly large limit the column to 80 chars to maintain alignment + maxTokenLen = Math.min(maxTokenLen, 80); + maxTerminalLen = Math.min(maxTerminalLen, 20); + sb.append(String.format("|%-" + maxTokenLen + "s | %-" + maxLocLen + "s | %-" + maxTerminalLen + "s|\n", + "\"Match\"", "(location)", "Terminal")); + sb.append(String.format("|-%s|--%s|-%s|\n", "-".repeat(maxTokenLen), "-".repeat(maxLocLen), "-".repeat(maxTerminalLen))); + for (int i = 0; i < words.size(); i++) { + Scanner.Token word = words.get(i); + sb.append(String.format("|%-" + maxTokenLen + "s | %-" + maxLocLen + "s | %-" + maxTerminalLen + "s|\n", + tokens.get(i), locs.get(i), terminals.get(i))); + } + } + return sb.toString(); + } + public Tuple2, K>, Set> parseString(String input, Sort startSymbol, String startSymbolLocation, Source source) { try (Scanner scanner = getScanner()) { diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java index 7f48bda662e..8f3f541295e 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/EarleyParser.java @@ -1,6 +1,7 @@ // Copyright (c) K Team. All Rights Reserved. package org.kframework.parser.inner.kernel; +import com.google.common.primitives.Ints; import org.apache.commons.codec.binary.StringUtils; import org.jetbrains.annotations.NotNull; import org.kframework.attributes.Att; @@ -23,18 +24,7 @@ import org.pcollections.ConsPStack; import org.pcollections.PStack; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import static org.kframework.Collections.*; @@ -525,7 +515,7 @@ public void finish() { * Metadata about the state of the sentence being parsed. We collect this all in a single place in order to simplify * the type signatures of many methods. */ - private static class ParserMetadata { + public static class ParserMetadata { /** * @param input The sentence being parsed * @param scanner The scanner to use to tokenize the sentence @@ -583,15 +573,27 @@ public ParserMetadata(String input, Scanner scanner, Source source, int startLin } // the list of tokens in the sentence - Scanner.Token[] words; + final Scanner.Token[] words; // an array containing the line of each character in the input sentence - int[] lines; + final int[] lines; // an array containing the column of each character in the input sentence - int[] columns; + final int[] columns; // a Source containing the file the sentence was parsed from Source source; // the original un-tokenized input sentence String input; + + public List getWords() { + return List.of(words); + } + + public List getLines() { + return Collections.unmodifiableList(Ints.asList(lines)); + } + + public List getColumns() { + return Collections.unmodifiableList(Ints.asList(columns)); + } } /** diff --git a/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java b/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java index d59300a5b39..b71d0c44c16 100644 --- a/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java +++ b/kernel/src/main/java/org/kframework/parser/inner/kernel/Scanner.java @@ -30,14 +30,7 @@ import java.nio.ByteOrder; import java.nio.file.StandardCopyOption; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; import java.util.concurrent.Semaphore; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -57,6 +50,10 @@ public class Scanner implements AutoCloseable { public static final String COMPILER = OS.current().equals(OS.OSX) ? "clang" : "gcc"; + public Map> getTokens() { + return Collections.unmodifiableMap(tokens); + } + public static Map> getTokens(Module module) { Map tokens = new TreeMap<>(); Set terminals = new HashSet<>(); @@ -349,10 +346,10 @@ public int resolve(TerminalLike terminal) { } public static class Token { - final int kind; - final String value; - final int startLoc; - final int endLoc; + public final int kind; + public final String value; + public final int startLoc; + public final int endLoc; public Token(int kind, String value, long startLoc, long endLoc) { this.kind = kind; diff --git a/package/test-package b/package/test-package index 704750337ac..6c4ae1f0bcb 100755 --- a/package/test-package +++ b/package/test-package @@ -23,5 +23,27 @@ sort_int = kllvm.ast.CompositeSort("SortInt") assert str(sort_int) == "SortInt{}" HERE +# Make sure that the help messages work and that the version of the various components is correct +PACKAGE_VERSION=$(cat /usr/lib/kframework/version) + +kast --help | grep -q -- "--version" +kdep --help | grep -q -- "--version" +kompile --help | grep -q -- "--version" +kprove --help | grep -q -- "--version" +krun --help | grep -q -- "--version" +kserver --help | grep -q -- "--version" +kparse --help | grep -q -- "--version" +kparse-gen --help | grep -q -- "--version" +kore-print --help | grep -q -- "--version" +kast --version | grep -q ${PACKAGE_VERSION} +kdep --version | grep -q ${PACKAGE_VERSION} +kompile --version | grep -q ${PACKAGE_VERSION} +kprove --version | grep -q ${PACKAGE_VERSION} +krun --version | grep -q ${PACKAGE_VERSION} +kserver --version | grep -q ${PACKAGE_VERSION} +kparse --version | grep -q ${PACKAGE_VERSION} +kparse-gen --version | grep -q ${PACKAGE_VERSION} +kore-print --version | grep -q ${PACKAGE_VERSION} + # Make sure that the Haskell Backend Booster has been installed properly. kore-rpc-booster --help