Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kast --debug-tokens #3660

Merged
merged 16 commits into from
Oct 4, 2023
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
DEF=test
EXT=test
TESTDIR=.
KAST_FLAGS=--debug-tokens

include ../../../include/kframework/ktest.mak
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a + 100 + 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
`Match` (location), Terminal
------------------------------------
`a` (1,1,1,2), r"[a-z][a-zA-Z0-9]*"
`+` (1,3,1,4), "+"
`100` (1,5,1,8), r"[\\+-]?[0-9]+"
`+` (1,9,1,10), "+"
`2` (1,11,1,12), r"[\\+-]?[0-9]+"
`` (2,1,2,1), "<eof>"

13 changes: 13 additions & 0 deletions k-distribution/tests/regression-new/issue-3647-debugTokens/test.k
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright (c) K Team. All Rights Reserved.

module TEST-SYNTAX
imports INT-SYNTAX
imports ID-SYNTAX
syntax Exp ::= Exp "+" Exp [left] | Int | Id
endmodule

module TEST
imports TEST-SYNTAX
configuration <k> $PGM:Exp </k>

endmodule
80 changes: 44 additions & 36 deletions kernel/src/main/java/org/kframework/kast/KastFrontEnd.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,35 +135,39 @@ public int run() {
Source source = options.source();

try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(mod, true, null)) {
Tuple2<Either<Set<KEMException>, K>, Set<KEMException>> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source);
kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet()));
if (res._1().isLeft()) {
throw res._1().left().get().iterator().next();
if (options.debugTokens)
System.out.println(parseInModule.tokenizeString(stringToParse, source));
else {
Tuple2<Either<Set<KEMException>, K>, Set<KEMException>> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source);
kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet()));
if (res._1().isLeft()) {
throw res._1().left().get().iterator().next();
}
// important to get the extension module for unparsing because it contains generated syntax
// like casts, projections and others
Module unparsingMod = parseInModule.getExtensionModule();
K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get());

if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}
ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod);
LabelInfo labelInfo = new LabelInfoFromModule(mod);
SortInfo sortInfo = SortInfo.fromModule(mod);

Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty());
if (options.steps.contains(KompileSteps.anonVars))
r = (Rule) new ResolveAnonVar().resolve(r);
r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates
r = (Rule) new ConstantFolding().fold(unparsingMod, r);
r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r);
if (options.steps.contains(KompileSteps.concretizeCells)) {
r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r);
r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r);
}
K result = r.body();
kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort);
}
// important to get the extension module for unparsing because it contains generated syntax
// like casts, projections and others
Module unparsingMod = parseInModule.getExtensionModule();
K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get());

if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}
ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod);
LabelInfo labelInfo = new LabelInfoFromModule(mod);
SortInfo sortInfo = SortInfo.fromModule(mod);

Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty());
if (options.steps.contains(KompileSteps.anonVars))
r = (Rule) new ResolveAnonVar().resolve(r);
r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates
r = (Rule) new ConstantFolding().fold(unparsingMod, r);
r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r);
if (options.steps.contains(KompileSteps.concretizeCells)) {
r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r);
r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r);
}
K result = r.body();
kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort);
}

sw.printTotal("Total");
Expand Down Expand Up @@ -210,17 +214,21 @@ public int run() {
} else {
Reader stringToParse = options.stringToParse();
Source source = options.source();
K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse));
if (options.debugTokens)
System.out.println(kread.showTokens(parsingMod, def, FileUtil.read(stringToParse), source));
else {
K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse));

if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}
if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}

if (sort.equals(Sorts.K())) {
sort = Sorts.KItem();
}
if (sort.equals(Sorts.K())) {
sort = Sorts.KItem();
}

kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort);
kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort);
}
}

sw.printTotal("Total");
Expand Down
3 changes: 3 additions & 0 deletions kernel/src/main/java/org/kframework/kast/KastOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,7 @@ public Class<InputModes> enumClass() {
return InputModes.class;
}
}

@Parameter(names="--debug-tokens", description="Print the list of tokens matched by the scanner. Useful for debugging parsing errors.")
public boolean debugTokens = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ public K parseSingleTerm(Module module, Sort programStartSymbol, String startSym
}
}

public String showTokens(Module module, FileUtil files, String s, Source source) {
try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(module, true, files)) {
return parseInModule.tokenizeString(s, source);
}
}

public Module getExtensionModule(Module module, FileUtil files) {
return RuleGrammarGenerator.getCombinedGrammar(module, true, files).getExtensionModule();
}
Expand Down
4 changes: 4 additions & 0 deletions kernel/src/main/java/org/kframework/parser/KRead.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ public KRead(
this.globalOptions = globalOptions;
}

public String showTokens(Module mod, CompiledDefinition def, String stringToParse, Source source) {
return def.showTokens(mod, files, stringToParse, source);
}

public K prettyRead(Module mod, Sort sort, String startSymbolLocation, CompiledDefinition def, Source source, String stringToParse) {
return prettyRead(mod, sort, startSymbolLocation, def, source, stringToParse, this.input);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import org.kframework.attributes.Source;
import org.kframework.builtin.Sorts;
import org.kframework.definition.Module;
import org.kframework.definition.Terminal;
import org.kframework.definition.TerminalLike;
import org.kframework.kore.K;
import org.kframework.kore.Sort;
import org.kframework.main.GlobalOptions;
Expand All @@ -28,10 +30,9 @@
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Queue;
import java.util.Set;
import java.util.*;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.stream.Collectors;

/**
* A wrapper that takes a module and one can call the parser
Expand Down Expand Up @@ -159,6 +160,31 @@ public void initialize() {
}
}

/**
* Print the list of tokens matched by the scanner, the location and the Regex Terminal
*/
public String tokenizeString(String input, Source source) {
StringBuilder sb = new StringBuilder();
sb.append("`Match` (location), Terminal\n");
sb.append("------------------------------------\n");
try (Scanner scanner = getScanner()) {
EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1);
Map<Integer, TerminalLike> kind2Token =
scanner.getTokens().entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey()))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
List<Integer> lines = mdata.getLines();
List<Integer> columns = mdata.getColumns();
for (Scanner.Token word : mdata.getWords()) {
String loc = String.format("(%d,%d,%d,%d),",
lines.get(word.startLoc), columns.get(word.startLoc),
lines.get(word.endLoc), columns.get(word.endLoc));
sb.append(String.format("%-10s %-12s %s\n", '`' + word.value + '`', loc,
kind2Token.getOrDefault(word.kind, Terminal.apply("<eof>"))));
}
}
return sb.toString();
}

public Tuple2<Either<Set<KEMException>, K>, Set<KEMException>>
parseString(String input, Sort startSymbol, String startSymbolLocation, Source source) {
try (Scanner scanner = getScanner()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,7 @@
import org.pcollections.ConsPStack;
import org.pcollections.PStack;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;

import static org.kframework.Collections.*;
Expand Down Expand Up @@ -525,7 +514,7 @@ public void finish() {
* Metadata about the state of the sentence being parsed. We collect this all in a single place in order to simplify
* the type signatures of many methods.
*/
private static class ParserMetadata {
public static class ParserMetadata {
/**
* @param input The sentence being parsed
* @param scanner The scanner to use to tokenize the sentence
Expand Down Expand Up @@ -583,15 +572,27 @@ public ParserMetadata(String input, Scanner scanner, Source source, int startLin
}

// the list of tokens in the sentence
Scanner.Token[] words;
final Scanner.Token[] words;
// an array containing the line of each character in the input sentence
int[] lines;
final int[] lines;
// an array containing the column of each character in the input sentence
int[] columns;
final int[] columns;
// a Source containing the file the sentence was parsed from
Source source;
// the original un-tokenized input sentence
String input;

public List<Scanner.Token> getWords() {
return List.of(words);
}

public List<Integer> getLines() {
return Arrays.stream(lines).boxed().collect(Collectors.toList());
}

public List<Integer> getColumns() {
return Arrays.stream(columns).boxed().collect(Collectors.toList());
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,7 @@
import java.nio.ByteOrder;
import java.nio.file.StandardCopyOption;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.*;
import java.util.concurrent.Semaphore;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
Expand All @@ -57,6 +50,10 @@ public class Scanner implements AutoCloseable {

public static final String COMPILER = OS.current().equals(OS.OSX) ? "clang" : "gcc";

public Map<TerminalLike, Tuple2<Integer, Integer>> getTokens() {
return Collections.unmodifiableMap(tokens);
}

public static Map<TerminalLike, Tuple2<Integer, Integer>> getTokens(Module module) {
Map<TerminalLike, Integer> tokens = new TreeMap<>();
Set<String> terminals = new HashSet<>();
Expand Down Expand Up @@ -349,10 +346,10 @@ public int resolve(TerminalLike terminal) {
}

public static class Token {
final int kind;
final String value;
final int startLoc;
final int endLoc;
public final int kind;
public final String value;
public final int startLoc;
public final int endLoc;

public Token(int kind, String value, long startLoc, long endLoc) {
this.kind = kind;
Expand Down