From 0f73cc0ba72ce38a63c616cc7c71f325cbfd867c Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Tue, 16 Jul 2024 11:08:35 +0200 Subject: [PATCH 1/8] Run token sequence normalization without topological sorting whenever match merging is enabled to prevent interference. --- core/src/main/java/de/jplag/JPlag.java | 3 +- core/src/main/java/de/jplag/Submission.java | 7 +-- .../src/main/java/de/jplag/SubmissionSet.java | 12 ++++-- ...izer.java => TokenSequenceNormalizer.java} | 43 ++++++++++++++----- .../test/java/de/jplag/NormalizationTest.java | 27 +++++++++--- 5 files changed, 67 insertions(+), 25 deletions(-) rename core/src/main/java/de/jplag/normalization/{TokenStringNormalizer.java => TokenSequenceNormalizer.java} (63%) diff --git a/core/src/main/java/de/jplag/JPlag.java b/core/src/main/java/de/jplag/JPlag.java index 41f1c08c8e..573333ebd4 100644 --- a/core/src/main/java/de/jplag/JPlag.java +++ b/core/src/main/java/de/jplag/JPlag.java @@ -72,7 +72,8 @@ public static JPlagResult run(JPlagOptions options) throws ExitException { SubmissionSetBuilder builder = new SubmissionSetBuilder(options); SubmissionSet submissionSet = builder.buildSubmissionSet(); if (options.normalize() && options.language().supportsNormalization() && options.language().requiresCoreNormalization()) { - submissionSet.normalizeSubmissions(); + boolean normalizeOrder = !options.mergingOptions().enabled(); // match merging conflicts with sorting + submissionSet.normalizeSubmissions(normalizeOrder); } int submissionCount = submissionSet.numberOfSubmissions(); if (submissionCount < 2) { diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 92c8fd5c8d..39df988662 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -24,7 +24,7 @@ import org.slf4j.LoggerFactory; import de.jplag.exceptions.LanguageException; -import de.jplag.normalization.TokenStringNormalizer; +import de.jplag.normalization.TokenSequenceNormalizer; import de.jplag.options.JPlagOptions; /** @@ -256,10 +256,11 @@ private static File createErrorDirectory(String... subdirectoryNames) { /** * Perform token sequence normalization, which makes the token sequence invariant to dead code insertion and independent * statement reordering. + * @param sorting determines whether to perform topological sorting during normalization. */ - void normalize() { + void normalize(boolean sorting) { List originalOrder = getOrder(tokenList); - tokenList = TokenStringNormalizer.normalize(tokenList); + tokenList = TokenSequenceNormalizer.normalize(tokenList, sorting); List normalizedOrder = getOrder(tokenList); logger.debug("original line order: {}", originalOrder); diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java index f7c1438bbb..8153dd0394 100644 --- a/core/src/main/java/de/jplag/SubmissionSet.java +++ b/core/src/main/java/de/jplag/SubmissionSet.java @@ -99,11 +99,17 @@ public List getInvalidSubmissions() { return invalidSubmissions; } - public void normalizeSubmissions() { + /** + * Normalizes the token sequences of all submissions (including basecode). This makes the token sequence invariant to + * dead code insertion and independent statement reordering by removing dead tokens and optionally reordering tokens to + * a deterministic order. + * @param sorting determines whether to perform topological sorting during normalization. + */ + public void normalizeSubmissions(boolean sorting) { if (baseCodeSubmission != null) { - baseCodeSubmission.normalize(); + baseCodeSubmission.normalize(sorting); } - ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, Submission::normalize); + ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, submission -> submission.normalize(sorting)); } private List filterValidSubmissions() { diff --git a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java similarity index 63% rename from core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java rename to core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index 8ffafffbf7..cc0b4c2be0 100644 --- a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -16,24 +16,34 @@ /** * Performs token sequence normalization. */ -public class TokenStringNormalizer { +public class TokenSequenceNormalizer { - private TokenStringNormalizer() { + private TokenSequenceNormalizer() { } /** * Performs token sequence normalization. Tokens representing dead code have been eliminated and tokens representing - * subsequent independent statements have been put in a fixed order. Works by first constructing a Normalization Graph - * and then turning it back into a token sequence. + * subsequent independent statements have been put in a fixed order if sorting is true. Works by first constructing a + * Normalization Graph and then turning it back into a token sequence. * @param tokens The original token sequence, remains unaltered. - * @return The normalized token sequence as unmodifiable list. + * @param sorting Boolean flag to control if the tokens should be topologically sorted. + * @return The normalized token sequence. */ - public static List normalize(List tokens) { + public static List normalize(List tokens, boolean sorting) { SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get(); + propagateKeepStatus(normalizationGraph); + if (sorting) { + return normalizeWithSorting(tokens, normalizationGraph); + } else { + return normalizeWithoutSorting(normalizationGraph, tokens); + } + + } + + // Add tokens in normalized original order, removing dead tokens + private static List normalizeWithSorting(List tokens, SimpleDirectedGraph normalizationGraph) { List normalizedTokens = new ArrayList<>(tokens.size()); - spreadKeep(normalizationGraph); - PriorityQueue roots = normalizationGraph.vertexSet().stream() // - .filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) // + PriorityQueue roots = normalizationGraph.vertexSet().stream().filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) .collect(Collectors.toCollection(PriorityQueue::new)); while (!roots.isEmpty()) { PriorityQueue newRoots = new PriorityQueue<>(); @@ -51,13 +61,24 @@ public static List normalize(List tokens) { } while (!roots.isEmpty()); roots = newRoots; } - return Collections.unmodifiableList(normalizedTokens); + return normalizedTokens; + } + + // Add tokens in the original order, removing dead tokens + private static List normalizeWithoutSorting(SimpleDirectedGraph normalizationGraph, List tokens) { + List normalizedTokens = new ArrayList<>(tokens.size()); + for (Statement statement : normalizationGraph.vertexSet()) { + if (statement.semantics().keep()) { + normalizedTokens.addAll(statement.tokens()); + } + } + return normalizedTokens; } /** * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. */ - private static void spreadKeep(SimpleDirectedGraph normalizationGraph) { + private static void propagateKeepStatus(SimpleDirectedGraph normalizationGraph) { Queue visit = new LinkedList<>(normalizationGraph.vertexSet().stream() // .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { diff --git a/core/src/test/java/de/jplag/NormalizationTest.java b/core/src/test/java/de/jplag/NormalizationTest.java index c6a9db9ed1..324081ae8b 100644 --- a/core/src/test/java/de/jplag/NormalizationTest.java +++ b/core/src/test/java/de/jplag/NormalizationTest.java @@ -6,37 +6,50 @@ import java.util.stream.Collectors; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import de.jplag.exceptions.ExitException; import de.jplag.options.JPlagOptions; class NormalizationTest extends TestBase { - private final Map> tokenStringMap; - private final List originalTokenString; + private Map> tokenStringMap; + private List originalTokenString; + private SubmissionSet submissionSet; - NormalizationTest() throws ExitException { + @BeforeEach + void setUp() throws ExitException { JPlagOptions options = getDefaultOptions("normalization"); SubmissionSetBuilder builder = new SubmissionSetBuilder(options); - SubmissionSet submissionSet = builder.buildSubmissionSet(); - submissionSet.normalizeSubmissions(); + submissionSet = builder.buildSubmissionSet(); + + } + + private void normalizeSubmissions(boolean sorting) { + submissionSet.normalizeSubmissions(sorting); Function> getTokenString = submission -> submission.getTokenList().stream().map(Token::getType).toList(); tokenStringMap = submissionSet.getSubmissions().stream().collect(Collectors.toMap(Submission::getName, getTokenString)); originalTokenString = tokenStringMap.get("Squares.java"); } - @Test - void testInsertionNormalization() { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testInsertionNormalization(boolean sorting) { + normalizeSubmissions(sorting); Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInserted.java")); } @Test void testReorderingNormalization() { + normalizeSubmissions(true); Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresReordered.java")); } @Test void testInsertionReorderingNormalization() { + normalizeSubmissions(true); Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInsertedReordered.java")); } } From 793007cf339b07eea57c003b16b8a4b6b179b914 Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Tue, 16 Jul 2024 11:14:36 +0200 Subject: [PATCH 2/8] Remove unused import. --- .../java/de/jplag/normalization/TokenSequenceNormalizer.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index cc0b4c2be0..a5638dd6de 100644 --- a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -1,7 +1,6 @@ package de.jplag.normalization; import java.util.ArrayList; -import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; From d0f13a85be4637ec3423d3282d43ab8a77d2c663 Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Tue, 16 Jul 2024 13:19:51 +0200 Subject: [PATCH 3/8] Extend normalization test cases. --- .../jplag/normalization/TokenSequenceNormalizer.java | 4 +--- core/src/test/java/de/jplag/NormalizationTest.java | 11 ++++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index a5638dd6de..1b76f3a7dd 100644 --- a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -33,10 +33,8 @@ public static List normalize(List tokens, boolean sorting) { propagateKeepStatus(normalizationGraph); if (sorting) { return normalizeWithSorting(tokens, normalizationGraph); - } else { - return normalizeWithoutSorting(normalizationGraph, tokens); } - + return normalizeWithoutSorting(normalizationGraph, tokens); } // Add tokens in normalized original order, removing dead tokens diff --git a/core/src/test/java/de/jplag/NormalizationTest.java b/core/src/test/java/de/jplag/NormalizationTest.java index 324081ae8b..3de5dcb480 100644 --- a/core/src/test/java/de/jplag/NormalizationTest.java +++ b/core/src/test/java/de/jplag/NormalizationTest.java @@ -24,9 +24,9 @@ void setUp() throws ExitException { JPlagOptions options = getDefaultOptions("normalization"); SubmissionSetBuilder builder = new SubmissionSetBuilder(options); submissionSet = builder.buildSubmissionSet(); - } + // normalize submission set and initialize fields private void normalizeSubmissions(boolean sorting) { submissionSet.normalizeSubmissions(sorting); Function> getTokenString = submission -> submission.getTokenList().stream().map(Token::getType).toList(); @@ -41,10 +41,11 @@ void testInsertionNormalization(boolean sorting) { Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInserted.java")); } - @Test - void testReorderingNormalization() { - normalizeSubmissions(true); - Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresReordered.java")); + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testReorderingNormalization(boolean sorting) { + normalizeSubmissions(sorting); + Assertions.assertEquals(sorting, originalTokenString.equals(tokenStringMap.get("SquaresReordered.java"))); } @Test From c3e8703120129a738f0304ba463f62cae33c30fe Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Tue, 16 Jul 2024 13:23:17 +0200 Subject: [PATCH 4/8] Minor code quality improvements. --- .../java/de/jplag/normalization/TokenSequenceNormalizer.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index 1b76f3a7dd..7c24493e33 100644 --- a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -15,9 +15,10 @@ /** * Performs token sequence normalization. */ -public class TokenSequenceNormalizer { +public final class TokenSequenceNormalizer { private TokenSequenceNormalizer() { + // private constructor for non-instantiability. } /** From 46744f21715294861b725ea669b18a8916d0128d Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Wed, 17 Jul 2024 16:35:10 +0200 Subject: [PATCH 5/8] Refactor token sequence normalization completely to improve code quality. --- .../de/jplag/normalization/MultipleEdge.java | 2 +- ...nstructor.java => NormalizationGraph.java} | 28 ++++--- .../de/jplag/normalization/Statement.java | 11 ++- .../jplag/normalization/StatementBuilder.java | 4 + .../TokenSequenceNormalizer.java | 40 +++++---- .../de/jplag/semantics/CodeSemantics.java | 82 +++++++++---------- 6 files changed, 92 insertions(+), 75 deletions(-) rename core/src/main/java/de/jplag/normalization/{NormalizationGraphConstructor.java => NormalizationGraph.java} (87%) diff --git a/core/src/main/java/de/jplag/normalization/MultipleEdge.java b/core/src/main/java/de/jplag/normalization/MultipleEdge.java index b10fda2ea5..732d3d3cf2 100644 --- a/core/src/main/java/de/jplag/normalization/MultipleEdge.java +++ b/core/src/main/java/de/jplag/normalization/MultipleEdge.java @@ -6,7 +6,7 @@ import de.jplag.semantics.Variable; /** - * Models a multiple edge in the normalization graph. Contains multiple edges. + * Models multiple edges between two nodes in the normalization graph. */ class MultipleEdge { private final Set edges; diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java similarity index 87% rename from core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java rename to core/src/main/java/de/jplag/normalization/NormalizationGraph.java index fc995e69d7..1eba1eefe4 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -14,10 +14,14 @@ import de.jplag.semantics.Variable; /** - * Constructs the normalization graph. + * Token normalization graph, which is a directed graph based on nodes of type {@link Statement} and edges of type + * {@link MultipleEdge}. This class class inherits from {@link SimpleDirectedGraph} to provide a data structure for the + * token sequence normalization. */ -class NormalizationGraphConstructor { - private final SimpleDirectedGraph graph; +public class NormalizationGraph extends SimpleDirectedGraph { + + private static final long serialVersionUID = -8407465274643809647L; // generated + private int bidirectionalBlockDepth; private final Collection fullPositionSignificanceIncoming; private Statement lastFullPositionSignificance; @@ -27,8 +31,11 @@ class NormalizationGraphConstructor { private final Set inCurrentBidirectionalBlock; private Statement current; - NormalizationGraphConstructor(List tokens) { - graph = new SimpleDirectedGraph<>(MultipleEdge.class); + /** + * Creates a new normalization graph. + */ + public NormalizationGraph(List tokens) { + super(MultipleEdge.class); bidirectionalBlockDepth = 0; fullPositionSignificanceIncoming = new ArrayList<>(); variableReads = new HashMap<>(); @@ -45,12 +52,8 @@ class NormalizationGraphConstructor { addStatement(builderForCurrent.build()); } - SimpleDirectedGraph get() { - return graph; - } - private void addStatement(Statement statement) { - graph.addVertex(statement); + addVertex(statement); this.current = statement; processBidirectionalBlock(); processFullPositionSignificance(); @@ -123,10 +126,10 @@ private void processWrites() { * @param cause the variable that caused the edge, may be null */ private void addIncomingEdgeToCurrent(Statement start, EdgeType type, Variable cause) { - MultipleEdge multipleEdge = graph.getEdge(start, current); + MultipleEdge multipleEdge = getEdge(start, current); if (multipleEdge == null) { multipleEdge = new MultipleEdge(); - graph.addEdge(start, current, multipleEdge); + addEdge(start, current, multipleEdge); } multipleEdge.addEdge(type, cause); } @@ -135,4 +138,5 @@ private void addVariableToMap(Map> variableMap, variableMap.putIfAbsent(variable, new ArrayList<>()); variableMap.get(variable).add(current); } + } diff --git a/core/src/main/java/de/jplag/normalization/Statement.java b/core/src/main/java/de/jplag/normalization/Statement.java index a749a57740..81f9b33640 100644 --- a/core/src/main/java/de/jplag/normalization/Statement.java +++ b/core/src/main/java/de/jplag/normalization/Statement.java @@ -8,7 +8,7 @@ import de.jplag.semantics.CodeSemantics; /** - * Models statements, which are the nodes of the normalization graph. + * Models statements, which are the nodes of the normalization graph. A statement refers to one or more tokens. */ class Statement implements Comparable { @@ -16,6 +16,11 @@ class Statement implements Comparable { private final int lineNumber; private final CodeSemantics semantics; + /** + * Constructs a new Statement. + * @param tokens the list of tokens that represent this statement. + * @param lineNumber the line number where this statement occurs in the source code. + */ Statement(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); this.lineNumber = lineNumber; @@ -30,8 +35,8 @@ CodeSemantics semantics() { return semantics; } - void markKeep() { - semantics.markKeep(); + void markAsCritical() { + semantics.markAsCritical(); } private int tokenOrdinal(Token token) { diff --git a/core/src/main/java/de/jplag/normalization/StatementBuilder.java b/core/src/main/java/de/jplag/normalization/StatementBuilder.java index eef5d0c821..f9f3bd5008 100644 --- a/core/src/main/java/de/jplag/normalization/StatementBuilder.java +++ b/core/src/main/java/de/jplag/normalization/StatementBuilder.java @@ -13,6 +13,10 @@ class StatementBuilder { private final List tokens; private final int lineNumber; + /** + * Constructs a new StatementBuilder. + * @param lineNumber the line number where the statement starts in the source code. + */ StatementBuilder(int lineNumber) { this.lineNumber = lineNumber; this.tokens = new ArrayList<>(); diff --git a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index 7c24493e33..00b2aee34c 100644 --- a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -8,7 +8,6 @@ import java.util.stream.Collectors; import org.jgrapht.Graphs; -import org.jgrapht.graph.SimpleDirectedGraph; import de.jplag.Token; @@ -24,30 +23,32 @@ private TokenSequenceNormalizer() { /** * Performs token sequence normalization. Tokens representing dead code have been eliminated and tokens representing * subsequent independent statements have been put in a fixed order if sorting is true. Works by first constructing a - * Normalization Graph and then turning it back into a token sequence. + * Normalization Graph and then turning it back into a token sequence. For more information refer to the + * corresponding paper * @param tokens The original token sequence, remains unaltered. * @param sorting Boolean flag to control if the tokens should be topologically sorted. * @return The normalized token sequence. */ public static List normalize(List tokens, boolean sorting) { - SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get(); - propagateKeepStatus(normalizationGraph); + NormalizationGraph graph = new NormalizationGraph(tokens); + propagateCriticalityStatus(graph); if (sorting) { - return normalizeWithSorting(tokens, normalizationGraph); + return normalizeWithSorting(tokens, graph); } - return normalizeWithoutSorting(normalizationGraph, tokens); + return normalizeWithoutSorting(tokens, graph); } // Add tokens in normalized original order, removing dead tokens - private static List normalizeWithSorting(List tokens, SimpleDirectedGraph normalizationGraph) { + private static List normalizeWithSorting(List tokens, NormalizationGraph normalizationGraph) { List normalizedTokens = new ArrayList<>(tokens.size()); - PriorityQueue roots = normalizationGraph.vertexSet().stream().filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) + PriorityQueue roots = normalizationGraph.vertexSet().stream() // + .filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); while (!roots.isEmpty()) { PriorityQueue newRoots = new PriorityQueue<>(); do { Statement statement = roots.poll(); - if (statement.semantics().keep()) { + if (statement.semantics().isCritical()) { normalizedTokens.addAll(statement.tokens()); } for (Statement successor : Graphs.successorListOf(normalizationGraph, statement)) { @@ -63,10 +64,10 @@ private static List normalizeWithSorting(List tokens, SimpleDirect } // Add tokens in the original order, removing dead tokens - private static List normalizeWithoutSorting(SimpleDirectedGraph normalizationGraph, List tokens) { + private static List normalizeWithoutSorting(List tokens, NormalizationGraph normalizationGraph) { List normalizedTokens = new ArrayList<>(tokens.size()); for (Statement statement : normalizationGraph.vertexSet()) { - if (statement.semantics().keep()) { + if (statement.semantics().isCritical()) { normalizedTokens.addAll(statement.tokens()); } } @@ -74,22 +75,25 @@ private static List normalizeWithoutSorting(SimpleDirectedGraph normalizationGraph) { + private static void propagateCriticalityStatus(NormalizationGraph normalizationGraph) { Queue visit = new LinkedList<>(normalizationGraph.vertexSet().stream() // - .filter(tl -> tl.semantics().keep()).toList()); + .filter(tl -> tl.semantics().isCritical()).toList()); while (!visit.isEmpty()) { Statement current = visit.remove(); for (Statement predecessor : Graphs.predecessorListOf(normalizationGraph, current)) { // performance of iteration? - if (!predecessor.semantics().keep() && normalizationGraph.getEdge(predecessor, current).isVariableFlow()) { - predecessor.markKeep(); + if (!predecessor.semantics().isCritical() && normalizationGraph.getEdge(predecessor, current).isVariableFlow()) { + predecessor.markAsCritical(); visit.add(predecessor); } } for (Statement successor : Graphs.successorListOf(normalizationGraph, current)) { - if (!successor.semantics().keep() && normalizationGraph.getEdge(current, successor).isVariableReverseFlow()) { - successor.markKeep(); + if (!successor.semantics().isCritical() && normalizationGraph.getEdge(current, successor).isVariableReverseFlow()) { + successor.markAsCritical(); visit.add(successor); } } diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 2eb99262d5..7da3304a92 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -7,11 +7,11 @@ import java.util.Set; /** - * Contains semantic information about a code snippet, in our case either a token or a statement. + * Contains semantic information about a code fragment, in our case either a token or a statement. */ public class CodeSemantics { - private boolean keep; + private boolean critical; private PositionSignificance positionSignificance; private final int bidirectionalBlockDepthChange; private final Set reads; @@ -19,47 +19,47 @@ public class CodeSemantics { /** * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written - * to in this code snippet, are created empty. - * @param keep Whether the code snippet must be kept or if it may be removed. - * @param positionSignificance In which way the position of the code snippet relative to other code snippets of the same - * type is significant. For the possible options see {@link PositionSignificance}. - * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks + * to in this code fragment, are created empty. + * @param critical Whether the code fragment must be kept as it affects the program behavior or if it may be removed. + * @param positionSignificance In which way the position of the code fragment relative to other tokens of the same type + * is significant. For the possible options see {@link PositionSignificance}. + * @param bidirectionalBlockDepthChange How the code fragment affects the depth of bidirectional blocks, meaning blocks * where any statement within it may be executed after any other. This will typically be a loop. - * @param reads A set of the variables which were (potentially) read from in the code snippet. - * @param writes A set of the variables which were (potentially) written to in the code snippet. + * @param reads A set of the variables which were (potentially) read from in the code fragment. + * @param writes A set of the variables which were (potentially) written to in the code fragment. */ - private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, + private CodeSemantics(boolean critical, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, Set writes) { - this.keep = keep; + this.critical = critical; this.positionSignificance = positionSignificance; this.bidirectionalBlockDepthChange = bidirectionalBlockDepthChange; this.reads = reads; this.writes = writes; } - private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange) { - this(keep, positionSignificance, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); + private CodeSemantics(boolean critical, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange) { + this(critical, positionSignificance, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); } /** - * Creates new semantics with the following meaning: The code snippet may be removed, and its position relative to other - * code snippets may change. Example: An assignment to a local variable. + * Creates new semantics with the following meaning: The code fragment may be removed, and its position relative to + * other code fragments may change. Example: An assignment to a local variable. */ public CodeSemantics() { this(false, PositionSignificance.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position relative to - * other code snippets may change. Example: An attribute declaration. + * @return new semantics with the following meaning: The code fragment may not be removed, and its position relative to + * other code fragments may change. Example: An attribute declaration. */ public static CodeSemantics createKeep() { return new CodeSemantics(true, PositionSignificance.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to other code snippets of the same type. Example: A method call which is guaranteed to not result in an + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to other code fragments of the same type. Example: A method call which is guaranteed to not result in an * exception. */ public static CodeSemantics createCritical() { @@ -67,16 +67,16 @@ public static CodeSemantics createCritical() { } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to all other code snippets. Example: A return statement. + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to all other code fragments. Example: A return statement. */ public static CodeSemantics createControl() { return new CodeSemantics(true, PositionSignificance.FULL, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to all other code snippets, which also begins a bidirectional block. Example: The beginning of a while + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to all other code fragments, which also begins a bidirectional block. Example: The beginning of a while * loop. */ public static CodeSemantics createLoopBegin() { @@ -84,71 +84,71 @@ public static CodeSemantics createLoopBegin() { } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to all other code fragments, which also ends a bidirectional block. Example: The end of a while loop. */ public static CodeSemantics createLoopEnd() { return new CodeSemantics(true, PositionSignificance.FULL, -1); } /** - * @return whether this code snippet must be kept. + * @return whether this token is critical to the program behavior. */ - public boolean keep() { - return keep; + public boolean isCritical() { + return critical; } /** - * Mark this code snippet as having to be kept. + * Mark this token as critical to the program behavior. */ - public void markKeep() { - keep = true; + public void markAsCritical() { + critical = true; } /** - * @return the change this code snippet causes in the depth of bidirectional loops. + * @return the change this code fragment causes in the depth of bidirectional loops. */ public int bidirectionalBlockDepthChange() { return bidirectionalBlockDepthChange; } /** - * @return whether this code snippet has partial position significance. + * @return whether this code fragment has partial position significance. */ public boolean hasPartialPositionSignificance() { return positionSignificance == PositionSignificance.PARTIAL; } /** - * @return whether this code snippet has full position significance. + * @return whether this code fragment has full position significance. */ public boolean hasFullPositionSignificance() { return positionSignificance == PositionSignificance.FULL; } /** - * Mark this code snippet as having full position significance. + * Mark this code fragment as having full position significance. */ public void markFullPositionSignificance() { positionSignificance = PositionSignificance.FULL; } /** - * @return an unmodifiable set of the variables which were (potentially) read from in this code snippet. + * @return an unmodifiable set of the variables which were (potentially) read from in this code fragment. */ public Set reads() { return Collections.unmodifiableSet(reads); } /** - * @return an unmodifiable set of the variables which were (potentially) written to in this code snippet. + * @return an unmodifiable set of the variables which were (potentially) written to in this code fragment. */ public Set writes() { return Collections.unmodifiableSet(writes); } /** - * Add a variable to the set of variables which were (potentially) read from in this code snippet. + * Add a variable to the set of variables which were (potentially) read from in this code fragment. * @param variable The variable which is added. */ public void addRead(Variable variable) { @@ -156,7 +156,7 @@ public void addRead(Variable variable) { } /** - * Add a variable to the set of variables which were (potentially) written to in this code snippet. + * Add a variable to the set of variables which were (potentially) written to in this code fragment. * @param variable The variable which is added. */ public void addWrite(Variable variable) { @@ -182,7 +182,7 @@ public static CodeSemantics join(List semanticsList) { Set reads = new HashSet<>(); Set writes = new HashSet<>(); for (CodeSemantics semantics : semanticsList) { - keep = keep || semantics.keep; + keep = keep || semantics.critical; if (semantics.positionSignificance.compareTo(positionSignificance) > 0) { positionSignificance = semantics.positionSignificance; } @@ -196,7 +196,7 @@ public static CodeSemantics join(List semanticsList) { @Override public String toString() { List properties = new LinkedList<>(); - if (keep) { + if (critical) { properties.add("keep"); } if (positionSignificance != PositionSignificance.NONE) { From a88e538624dbcaff740bcff6e180995b59f57ce6 Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Wed, 17 Jul 2024 16:50:45 +0200 Subject: [PATCH 6/8] Make fields required for construction transient to make sonar happy. --- .../de/jplag/normalization/NormalizationGraph.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 1eba1eefe4..e07c873b50 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -23,13 +23,13 @@ public class NormalizationGraph extends SimpleDirectedGraph fullPositionSignificanceIncoming; - private Statement lastFullPositionSignificance; - private Statement lastPartialPositionSignificance; - private final Map> variableReads; - private final Map> variableWrites; - private final Set inCurrentBidirectionalBlock; - private Statement current; + private final transient Collection fullPositionSignificanceIncoming; + private transient Statement lastFullPositionSignificance; + private transient Statement lastPartialPositionSignificance; + private final transient Map> variableReads; + private final transient Map> variableWrites; + private final transient Set inCurrentBidirectionalBlock; + private transient Statement current; /** * Creates a new normalization graph. From 5ccddb4be495a3db0790dca50abb6a84a8e0b4e3 Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Tue, 30 Jul 2024 17:45:20 +0200 Subject: [PATCH 7/8] Revert disabling of topological sorting. --- core/src/main/java/de/jplag/JPlag.java | 3 +- core/src/main/java/de/jplag/Submission.java | 5 ++- .../src/main/java/de/jplag/SubmissionSet.java | 7 ++-- .../TokenSequenceNormalizer.java | 19 ++-------- .../test/java/de/jplag/NormalizationTest.java | 36 ++++++------------- 5 files changed, 19 insertions(+), 51 deletions(-) diff --git a/core/src/main/java/de/jplag/JPlag.java b/core/src/main/java/de/jplag/JPlag.java index 573333ebd4..41f1c08c8e 100644 --- a/core/src/main/java/de/jplag/JPlag.java +++ b/core/src/main/java/de/jplag/JPlag.java @@ -72,8 +72,7 @@ public static JPlagResult run(JPlagOptions options) throws ExitException { SubmissionSetBuilder builder = new SubmissionSetBuilder(options); SubmissionSet submissionSet = builder.buildSubmissionSet(); if (options.normalize() && options.language().supportsNormalization() && options.language().requiresCoreNormalization()) { - boolean normalizeOrder = !options.mergingOptions().enabled(); // match merging conflicts with sorting - submissionSet.normalizeSubmissions(normalizeOrder); + submissionSet.normalizeSubmissions(); } int submissionCount = submissionSet.numberOfSubmissions(); if (submissionCount < 2) { diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 39df988662..5610a19873 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -256,11 +256,10 @@ private static File createErrorDirectory(String... subdirectoryNames) { /** * Perform token sequence normalization, which makes the token sequence invariant to dead code insertion and independent * statement reordering. - * @param sorting determines whether to perform topological sorting during normalization. */ - void normalize(boolean sorting) { + void normalize() { List originalOrder = getOrder(tokenList); - tokenList = TokenSequenceNormalizer.normalize(tokenList, sorting); + tokenList = TokenSequenceNormalizer.normalize(tokenList); List normalizedOrder = getOrder(tokenList); logger.debug("original line order: {}", originalOrder); diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java index 8153dd0394..65e0474950 100644 --- a/core/src/main/java/de/jplag/SubmissionSet.java +++ b/core/src/main/java/de/jplag/SubmissionSet.java @@ -103,13 +103,12 @@ public List getInvalidSubmissions() { * Normalizes the token sequences of all submissions (including basecode). This makes the token sequence invariant to * dead code insertion and independent statement reordering by removing dead tokens and optionally reordering tokens to * a deterministic order. - * @param sorting determines whether to perform topological sorting during normalization. */ - public void normalizeSubmissions(boolean sorting) { + public void normalizeSubmissions() { if (baseCodeSubmission != null) { - baseCodeSubmission.normalize(sorting); + baseCodeSubmission.normalize(); } - ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, submission -> submission.normalize(sorting)); + ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, submission -> submission.normalize()); } private List filterValidSubmissions() { diff --git a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index 00b2aee34c..9a1256300e 100644 --- a/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -26,16 +26,12 @@ private TokenSequenceNormalizer() { * Normalization Graph and then turning it back into a token sequence. For more information refer to the * corresponding paper * @param tokens The original token sequence, remains unaltered. - * @param sorting Boolean flag to control if the tokens should be topologically sorted. * @return The normalized token sequence. */ - public static List normalize(List tokens, boolean sorting) { + public static List normalize(List tokens) { NormalizationGraph graph = new NormalizationGraph(tokens); propagateCriticalityStatus(graph); - if (sorting) { - return normalizeWithSorting(tokens, graph); - } - return normalizeWithoutSorting(tokens, graph); + return normalizeWithSorting(tokens, graph); } // Add tokens in normalized original order, removing dead tokens @@ -63,17 +59,6 @@ private static List normalizeWithSorting(List tokens, Normalizatio return normalizedTokens; } - // Add tokens in the original order, removing dead tokens - private static List normalizeWithoutSorting(List tokens, NormalizationGraph normalizationGraph) { - List normalizedTokens = new ArrayList<>(tokens.size()); - for (Statement statement : normalizationGraph.vertexSet()) { - if (statement.semantics().isCritical()) { - normalizedTokens.addAll(statement.tokens()); - } - } - return normalizedTokens; - } - /** * Spread criticality status to every node that does not represent dead code. Nodes without keep criticality are later * eliminated (dead nodes). Before calling this method, only the statements that directly affect the behavior are marked diff --git a/core/src/test/java/de/jplag/NormalizationTest.java b/core/src/test/java/de/jplag/NormalizationTest.java index 3de5dcb480..f1ba200194 100644 --- a/core/src/test/java/de/jplag/NormalizationTest.java +++ b/core/src/test/java/de/jplag/NormalizationTest.java @@ -6,51 +6,37 @@ import java.util.stream.Collectors; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; import de.jplag.exceptions.ExitException; import de.jplag.options.JPlagOptions; class NormalizationTest extends TestBase { - private Map> tokenStringMap; - private List originalTokenString; - private SubmissionSet submissionSet; + private final Map> tokenStringMap; + private final List originalTokenString; - @BeforeEach - void setUp() throws ExitException { + NormalizationTest() throws ExitException { JPlagOptions options = getDefaultOptions("normalization"); SubmissionSetBuilder builder = new SubmissionSetBuilder(options); - submissionSet = builder.buildSubmissionSet(); - } - - // normalize submission set and initialize fields - private void normalizeSubmissions(boolean sorting) { - submissionSet.normalizeSubmissions(sorting); + SubmissionSet submissionSet = builder.buildSubmissionSet(); + submissionSet.normalizeSubmissions(); Function> getTokenString = submission -> submission.getTokenList().stream().map(Token::getType).toList(); tokenStringMap = submissionSet.getSubmissions().stream().collect(Collectors.toMap(Submission::getName, getTokenString)); originalTokenString = tokenStringMap.get("Squares.java"); } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - void testInsertionNormalization(boolean sorting) { - normalizeSubmissions(sorting); + @Test + void testInsertionNormalization() { Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInserted.java")); } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - void testReorderingNormalization(boolean sorting) { - normalizeSubmissions(sorting); - Assertions.assertEquals(sorting, originalTokenString.equals(tokenStringMap.get("SquaresReordered.java"))); + @Test + void testReorderingNormalization() { + Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresReordered.java")); } @Test void testInsertionReorderingNormalization() { - normalizeSubmissions(true); Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInsertedReordered.java")); } -} +} \ No newline at end of file From 86af3f88072b5a5ddf48b9329de1e01ee7d9648b Mon Sep 17 00:00:00 2001 From: Timur Saglam Date: Tue, 30 Jul 2024 17:49:58 +0200 Subject: [PATCH 8/8] Revert to more compact method reference syntax. --- core/src/main/java/de/jplag/SubmissionSet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java index 65e0474950..884d800450 100644 --- a/core/src/main/java/de/jplag/SubmissionSet.java +++ b/core/src/main/java/de/jplag/SubmissionSet.java @@ -108,7 +108,7 @@ public void normalizeSubmissions() { if (baseCodeSubmission != null) { baseCodeSubmission.normalize(); } - ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, submission -> submission.normalize()); + ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, Submission::normalize); } private List filterValidSubmissions() {