Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
devops committed Oct 4, 2023
2 parents 7df821a + a49c3a2 commit bbfc23f
Show file tree
Hide file tree
Showing 13 changed files with 194 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
DEF=test
EXT=test
TESTDIR=.
KAST_FLAGS=--debug-tokens

include ../../../include/kframework/ktest.mak
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
1 + 2 + aaaaaaaaaaaa










+ 10000000
+ "str"
+ "long str that breaks alighnment"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
|"Match" | (location) | Terminal |
|---------------------------------------------------------------------------------|---------------|---------------------|
|"1" | (1,1,1,2) | r"[\\+-]?[0-9]+" |
|"+" | (1,3,1,4) | "+" |
|"2" | (1,5,1,6) | r"[\\+-]?[0-9]+" |
|"+" | (1,7,1,8) | "+" |
|"aaaaaaaaaaaa" | (1,9,1,21) | r"[a-z][a-zA-Z0-9]*"|
|"+" | (12,1,12,2) | "+" |
|"10000000" | (12,3,12,11) | r"[\\+-]?[0-9]+" |
|"+" | (13,1,13,2) | "+" |
|"\"str\"" | (13,3,13,8) | r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]"|
|"+" | (14,1,14,2) | "+" |
|"\"long str that breaks alighnment\"" | (14,3,14,103) | r"[\\\"](([^\\\"\\n\\r\\\\])|([\\\\][nrtf\\\"\\\\])|([\\\\][x][0-9a-fA-F]{2})|([\\\\][u][0-9a-fA-F]{4})|([\\\\][U][0-9a-fA-F]{8}))*[\\\"]"|
|"" | (15,1,15,1) | "<eof>" |

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 + 2 + aaa
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
|"Match" | (location) | Terminal |
|--------|------------|---------------------|
|"1" | (1,1,1,2) | r"[\\+-]?[0-9]+" |
|"+" | (1,3,1,4) | "+" |
|"2" | (1,5,1,6) | r"[\\+-]?[0-9]+" |
|"+" | (1,7,1,8) | "+" |
|"aaa" | (1,9,1,12) | r"[a-z][a-zA-Z0-9]*"|
|"" | (2,1,2,1) | "<eof>" |

14 changes: 14 additions & 0 deletions k-distribution/tests/regression-new/issue-3647-debugTokens/test.k
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright (c) K Team. All Rights Reserved.

module TEST-SYNTAX
imports INT-SYNTAX
imports ID-SYNTAX
imports STRING-SYNTAX
syntax Exp ::= Exp "+" Exp [left] | Int | Id | String
endmodule

module TEST
imports TEST-SYNTAX
configuration <k> $PGM:Exp </k>

endmodule
80 changes: 44 additions & 36 deletions kernel/src/main/java/org/kframework/kast/KastFrontEnd.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,35 +135,39 @@ public int run() {
Source source = options.source();

try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(mod, true, null)) {
Tuple2<Either<Set<KEMException>, K>, Set<KEMException>> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source);
kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet()));
if (res._1().isLeft()) {
throw res._1().left().get().iterator().next();
if (options.debugTokens)
System.out.println(parseInModule.tokenizeString(stringToParse, source));
else {
Tuple2<Either<Set<KEMException>, K>, Set<KEMException>> res = parseInModule.parseString(stringToParse, sort, startSymbolLocation, source);
kem.addAllKException(res._2().stream().map(KEMException::getKException).collect(Collectors.toSet()));
if (res._1().isLeft()) {
throw res._1().left().get().iterator().next();
}
// important to get the extension module for unparsing because it contains generated syntax
// like casts, projections and others
Module unparsingMod = parseInModule.getExtensionModule();
K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get());

if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}
ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod);
LabelInfo labelInfo = new LabelInfoFromModule(mod);
SortInfo sortInfo = SortInfo.fromModule(mod);

Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty());
if (options.steps.contains(KompileSteps.anonVars))
r = (Rule) new ResolveAnonVar().resolve(r);
r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates
r = (Rule) new ConstantFolding().fold(unparsingMod, r);
r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r);
if (options.steps.contains(KompileSteps.concretizeCells)) {
r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r);
r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r);
}
K result = r.body();
kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort);
}
// important to get the extension module for unparsing because it contains generated syntax
// like casts, projections and others
Module unparsingMod = parseInModule.getExtensionModule();
K parsed = new TreeNodesToKORE(Outer::parseSort, true).down(res._1().right().get());

if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}
ConfigurationInfoFromModule configInfo = new ConfigurationInfoFromModule(mod);
LabelInfo labelInfo = new LabelInfoFromModule(mod);
SortInfo sortInfo = SortInfo.fromModule(mod);

Rule r = Rule.apply(parsed, BooleanUtils.TRUE, BooleanUtils.TRUE, Att.empty());
if (options.steps.contains(KompileSteps.anonVars))
r = (Rule) new ResolveAnonVar().resolve(r);
r = (Rule) new ResolveSemanticCasts(false).resolve(r); // move casts to side condition predicates
r = (Rule) new ConstantFolding().fold(unparsingMod, r);
r = (Rule) new CloseCells(configInfo, sortInfo, labelInfo).close(r);
if (options.steps.contains(KompileSteps.concretizeCells)) {
r = (Rule) new AddImplicitComputationCell(configInfo, labelInfo).apply(mod, r);
r = (Rule) new ConcretizeCells(configInfo, labelInfo, sortInfo, mod).concretize(mod, r);
}
K result = r.body();
kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), result, sort);
}

sw.printTotal("Total");
Expand Down Expand Up @@ -210,17 +214,21 @@ public int run() {
} else {
Reader stringToParse = options.stringToParse();
Source source = options.source();
K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse));
if (options.debugTokens)
System.out.println(kread.showTokens(parsingMod, def, FileUtil.read(stringToParse), source));
else {
K parsed = kread.prettyRead(parsingMod, sort, startSymbolLocation, def, source, FileUtil.read(stringToParse));

if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}
if (options.expandMacros) {
parsed = ExpandMacros.forNonSentences(unparsingMod, files.get(), def.kompileOptions, false).expand(parsed);
}

if (sort.equals(Sorts.K())) {
sort = Sorts.KItem();
}
if (sort.equals(Sorts.K())) {
sort = Sorts.KItem();
}

kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort);
kprint.get().prettyPrint(def.kompiledDefinition, unparsingMod, s -> kprint.get().outputFile(s), parsed, sort);
}
}

sw.printTotal("Total");
Expand Down
3 changes: 3 additions & 0 deletions kernel/src/main/java/org/kframework/kast/KastOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,7 @@ public Class<InputModes> enumClass() {
return InputModes.class;
}
}

@Parameter(names="--debug-tokens", description="Print a Markdown table of tokens matched by the scanner. Useful for debugging parsing errors.")
public boolean debugTokens = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ public K parseSingleTerm(Module module, Sort programStartSymbol, String startSym
}
}

public String showTokens(Module module, FileUtil files, String s, Source source) {
try (ParseInModule parseInModule = RuleGrammarGenerator.getCombinedGrammar(module, true, files)) {
return parseInModule.tokenizeString(s, source);
}
}

public Module getExtensionModule(Module module, FileUtil files) {
return RuleGrammarGenerator.getCombinedGrammar(module, true, files).getExtensionModule();
}
Expand Down
4 changes: 4 additions & 0 deletions kernel/src/main/java/org/kframework/parser/KRead.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ public KRead(
this.globalOptions = globalOptions;
}

public String showTokens(Module mod, CompiledDefinition def, String stringToParse, Source source) {
return def.showTokens(mod, files, stringToParse, source);
}

public K prettyRead(Module mod, Sort sort, String startSymbolLocation, CompiledDefinition def, Source source, String stringToParse) {
return prettyRead(mod, sort, startSymbolLocation, def, source, stringToParse, this.input);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import org.kframework.attributes.Source;
import org.kframework.builtin.Sorts;
import org.kframework.definition.Module;
import org.kframework.definition.Terminal;
import org.kframework.definition.TerminalLike;
import org.kframework.kore.K;
import org.kframework.kore.Sort;
import org.kframework.main.GlobalOptions;
Expand All @@ -16,6 +18,7 @@
import org.kframework.parser.inner.kernel.EarleyParser;
import org.kframework.parser.inner.kernel.Scanner;
import org.kframework.parser.outer.Outer;
import org.kframework.utils.StringUtil;
import org.kframework.utils.errorsystem.KEMException;
import org.kframework.utils.file.FileUtil;
import scala.Tuple2;
Expand All @@ -28,10 +31,9 @@
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Queue;
import java.util.Set;
import java.util.*;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.stream.Collectors;

/**
* A wrapper that takes a module and one can call the parser
Expand Down Expand Up @@ -159,6 +161,52 @@ public void initialize() {
}
}

/**
* Print the list of tokens matched by the scanner, the location and the Regex Terminal
* The output is a valid Markdown table.
*/
public String tokenizeString(String input, Source source) {
StringBuilder sb = new StringBuilder();
try (Scanner scanner = getScanner()) {
EarleyParser.ParserMetadata mdata = new EarleyParser.ParserMetadata(input, scanner, source, 1, 1);
Map<Integer, TerminalLike> kind2Token =
scanner.getTokens().entrySet().stream().map(a -> new Tuple2<>(a.getValue()._1, a.getKey()))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
List<Integer> lines = mdata.getLines();
List<Integer> columns = mdata.getColumns();
int maxTokenLen = 7, maxLocLen = 10, maxTerminalLen = 10;
List<String> locs = new ArrayList<>();
List<String> tokens = new ArrayList<>();
List<String> terminals = new ArrayList<>();
List<Scanner.Token> words = mdata.getWords();
for (Scanner.Token word : mdata.getWords()) {
String loc = String.format("(%d,%d,%d,%d)",
lines.get(word.startLoc), columns.get(word.startLoc),
lines.get(word.endLoc), columns.get(word.endLoc));
locs.add(loc);
maxLocLen = Math.max(maxLocLen, loc.length());
String tok = StringUtil.enquoteKString(word.value);
tokens.add(tok);
maxTokenLen = Math.max(maxTokenLen, tok.length());
String terminal = kind2Token.getOrDefault(word.kind, Terminal.apply("<eof>")).toString();
terminals.add(terminal);
maxTerminalLen = Math.max(maxTerminalLen, terminal.length());
}
// if the token is absurdly large limit the column to 80 chars to maintain alignment
maxTokenLen = Math.min(maxTokenLen, 80);
maxTerminalLen = Math.min(maxTerminalLen, 20);
sb.append(String.format("|%-" + maxTokenLen + "s | %-" + maxLocLen + "s | %-" + maxTerminalLen + "s|\n",
"\"Match\"", "(location)", "Terminal"));
sb.append(String.format("|-%s|--%s|-%s|\n", "-".repeat(maxTokenLen), "-".repeat(maxLocLen), "-".repeat(maxTerminalLen)));
for (int i = 0; i < words.size(); i++) {
Scanner.Token word = words.get(i);
sb.append(String.format("|%-" + maxTokenLen + "s | %-" + maxLocLen + "s | %-" + maxTerminalLen + "s|\n",
tokens.get(i), locs.get(i), terminals.get(i)));
}
}
return sb.toString();
}

public Tuple2<Either<Set<KEMException>, K>, Set<KEMException>>
parseString(String input, Sort startSymbol, String startSymbolLocation, Source source) {
try (Scanner scanner = getScanner()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) K Team. All Rights Reserved.
package org.kframework.parser.inner.kernel;

import com.google.common.primitives.Ints;
import org.apache.commons.codec.binary.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.kframework.attributes.Att;
Expand All @@ -23,18 +24,7 @@
import org.pcollections.ConsPStack;
import org.pcollections.PStack;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;

import static org.kframework.Collections.*;
Expand Down Expand Up @@ -525,7 +515,7 @@ public void finish() {
* Metadata about the state of the sentence being parsed. We collect this all in a single place in order to simplify
* the type signatures of many methods.
*/
private static class ParserMetadata {
public static class ParserMetadata {
/**
* @param input The sentence being parsed
* @param scanner The scanner to use to tokenize the sentence
Expand Down Expand Up @@ -583,15 +573,27 @@ public ParserMetadata(String input, Scanner scanner, Source source, int startLin
}

// the list of tokens in the sentence
Scanner.Token[] words;
final Scanner.Token[] words;
// an array containing the line of each character in the input sentence
int[] lines;
final int[] lines;
// an array containing the column of each character in the input sentence
int[] columns;
final int[] columns;
// a Source containing the file the sentence was parsed from
Source source;
// the original un-tokenized input sentence
String input;

public List<Scanner.Token> getWords() {
return List.of(words);
}

public List<Integer> getLines() {
return Collections.unmodifiableList(Ints.asList(lines));
}

public List<Integer> getColumns() {
return Collections.unmodifiableList(Ints.asList(columns));
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,7 @@
import java.nio.ByteOrder;
import java.nio.file.StandardCopyOption;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.*;
import java.util.concurrent.Semaphore;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
Expand All @@ -57,6 +50,10 @@ public class Scanner implements AutoCloseable {

public static final String COMPILER = OS.current().equals(OS.OSX) ? "clang" : "gcc";

public Map<TerminalLike, Tuple2<Integer, Integer>> getTokens() {
return Collections.unmodifiableMap(tokens);
}

public static Map<TerminalLike, Tuple2<Integer, Integer>> getTokens(Module module) {
Map<TerminalLike, Integer> tokens = new TreeMap<>();
Set<String> terminals = new HashSet<>();
Expand Down Expand Up @@ -349,10 +346,10 @@ public int resolve(TerminalLike terminal) {
}

public static class Token {
final int kind;
final String value;
final int startLoc;
final int endLoc;
public final int kind;
public final String value;
public final int startLoc;
public final int endLoc;

public Token(int kind, String value, long startLoc, long endLoc) {
this.kind = kind;
Expand Down

0 comments on commit bbfc23f

Please sign in to comment.