diff --git a/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDDetectionResult.java b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDDetectionResult.java new file mode 100644 index 00000000..2905c551 --- /dev/null +++ b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDDetectionResult.java @@ -0,0 +1,70 @@ +package de.unibremen.informatik.st.libvcs4j.pmd; + +import de.unibremen.informatik.st.libvcs4j.VCSFile; +import lombok.NonNull; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * The detection result of {@link CPDRunner}. + */ +public class CPDDetectionResult { + + /** + * The detected violations. + */ + private final List violations; + + /** + * Creates a new instance with given violations. + * + * @param violations + * The violations to store (flat copied, {@code null} values are + * filtered out). + * @throws NullPointerException + * If {@code violations} is {@code null}. + */ + public CPDDetectionResult(@NonNull List violations) { + this.violations = violations.stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + /** + * Returns the violations. + * + * @return + * A copy of the internal list. + */ + public List getViolations() { + return new ArrayList<>(violations); + } + + + /** + * Returns all violations detected in {@code file}. Returns an empty list + * if {@code file} is {@code null} or was not analyzed. + * + * @param file + * The requested file. + * @return + * All violations detected in {@code file}. + */ + public List violationsOf(final VCSFile file) { + if(file == null){return new ArrayList<>();} + List violationsOfFile = new ArrayList(); + for(CPDDuplication v : violations){ + List ranges = v.getRanges(); + for(VCSFile.Range range : ranges){ + if(range.getFile().getRelativePath() == file.getRelativePath()){ + violationsOfFile.add(v); + break; + } + } + } + return violationsOfFile; + } +} diff --git a/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDDuplication.java b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDDuplication.java new file mode 100644 index 00000000..bad861a2 --- /dev/null +++ b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDDuplication.java @@ -0,0 +1,45 @@ +package de.unibremen.informatik.st.libvcs4j.pmd; + +import de.unibremen.informatik.st.libvcs4j.VCSFile; +import de.unibremen.informatik.st.libvcs4j.mapping.Mappable; +import lombok.NonNull; +import lombok.Value; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * A readonly representation of a violation detected by CPD. + */ +@Value +public final class CPDDuplication implements Mappable { + + /** + * The ranges of this violation. + */ + @NonNull + private final List ranges; + + /** + * Amount of lines that are duplicated + */ + @NonNull + private final int lines; + + /** + * Amount of tokens that the duplication shares + */ + @NonNull + private final int tokens; + + @Override + public List getRanges() { + return new ArrayList<>(ranges); + } + + @Override + public Optional getMetadata() { + return Optional.of("cpd-clone"); + } +} diff --git a/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDRunner.java b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDRunner.java new file mode 100644 index 00000000..4ba01ae2 --- /dev/null +++ b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDRunner.java @@ -0,0 +1,107 @@ +package de.unibremen.informatik.st.libvcs4j.pmd; + +import de.unibremen.informatik.st.libvcs4j.Revision; +import lombok.Getter; +import lombok.NonNull; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; +import net.sourceforge.pmd.cpd.CPD; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +/** + * Allows to configure and run CPD on {@link Revision} instances. + */ +@Slf4j +public class CPDRunner { + + /** + * Minimum Tokens that have to match for duplicate to be reported + */ + @Getter + private int minimumTokens; + + /** + * Creates a new CPD runner. + */ + public CPDRunner(final int pMinimumTokens){ + minimumTokens = pMinimumTokens; + } + + /** + * Analyzes the given revision. + * + * @param revision + * The revision to analyze. + * @return + * The detection result. + * @throws NullPointerException + * If {@code revision} is {@code null}. + * @throws IOException + * If an error occurred while analyzing {@code revision}. + */ + public CPDDetectionResult analyze(@NonNull final Revision revision) + throws IOException { + + final List args = new ArrayList<>(); + // language + args.add("--language"); + args.add("java"); + // tokens + args.add("--minimum-tokens"); + args.add(String.valueOf(minimumTokens)); + // input + args.add("--files"); + args.add(revision.getOutput().toString()); + // format + args.add("--format"); + args.add("xml"); + // encoding + args.add("--encoding"); + args.add("utf-8"); + // Skip files that can't be tokenized instead of throwing Exceptions + args.add("--skip-lexical-errors"); + // Ignore Identifiers, Literals and Annotations to detect Type 2 Clones + args.add("--ignore-identifiers"); + args.add("--ignore-literals"); + args.add("--ignore-annotations"); + + + // Temporarily redirect stdout to a string. + final PrintStream stdout = System.out; + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final PrintStream ps = new PrintStream(bos); + System.setOut(ps); + try { + System.setProperty("net.sourceforge.pmd.cli.noExit","true"); + CPD.main(args.toArray(String[]::new)); + // According to PMD the resulting xml is UTF-8 encoded. + final String output = bos.toString(StandardCharsets.UTF_8.name()); + + // Parse output + final SAXParserFactory factory = SAXParserFactory.newInstance(); + final SAXParser saxParser = factory.newSAXParser(); + final InputStream bis = new ByteArrayInputStream( + output.getBytes(StandardCharsets.UTF_8.name())); + CPDSaxHandler handler = new CPDSaxHandler(revision.getFiles()); + saxParser.parse(bis, handler); + + // Result + return new CPDDetectionResult(handler.getViolations()); + } catch (final UnsupportedOperationException | SAXException + | ParserConfigurationException e) { + throw new IOException(e); + } finally { + // Make sure to reset stdout. + System.setOut(stdout); + } + } +} diff --git a/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDSaxHandler.java b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDSaxHandler.java new file mode 100644 index 00000000..94ef9095 --- /dev/null +++ b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/CPDSaxHandler.java @@ -0,0 +1,209 @@ +package de.unibremen.informatik.st.libvcs4j.pmd; + +import de.unibremen.informatik.st.libvcs4j.VCSFile; +import lombok.NonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +import static java.lang.Integer.parseInt; + +/** + * Handles the XML output of CPD and stores the result in {@link #violations}. + */ +class CPDSaxHandler extends DefaultHandler { + + /** + * The tab size (see {@link VCSFile.Position#tabSize}) which is used to + * create a position. + */ + private static final int TAB_SIZE = 8; + + /** + * The logger of this class. + */ + private static final Logger log = + LoggerFactory.getLogger(CPDSaxHandler.class); + + /** + * The files to process. + */ + private final Collection files; + + /** + * Used to map paths detected by CPD to {@link VCSFile} instances. + */ + private final Map path2File = new HashMap<>(); + + /** + * Stores the detected violations. + */ + private final List violations = new ArrayList<>(); + + /** + * The amount of lines that are duplicated. + */ + private String lines; + + /** + * The amount of tokens that are duplicated. + */ + private String tokens; + + /** + * List of files that share the duplication. + */ + private List ranges = new ArrayList<>(); + + /** + * Creates a new handler which uses the given collection of + * {@link VCSFile}s to link violations in files detected by CPD to their + * corresponding {@link VCSFile} instance. {@code null} values in + * {@code files} are filtered out. + * + * @param files + * The files to link violations against. + * @throws NullPointerException + * If {@code files} is {@code null}. + */ + public CPDSaxHandler(@NonNull final Collection files) + throws NullPointerException, IllegalArgumentException { + this.files = files.stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + @Override + public void startDocument() throws SAXException { + violations.clear(); + path2File.clear(); + ranges.clear(); + for (VCSFile f : files) { + try { + path2File.put(f.toFile().getCanonicalPath(), f); + } catch (final IOException e) { + log.warn("Unable to get canonical path of file '{}'. " + + "Falling back to regular path.", f.getPath()); + path2File.put(f.getPath(), f); + } + } + super.startDocument(); + } + + @Override + public void startElement( + final String uri, final String localName, final String qName, + final Attributes attributes) throws SAXException { + if (qName.equals("duplication")) { + lines = attributes.getValue("lines"); + if (lines == null) { + log.warn("Skipping violation due to missing 'lines' attribute"); + return; + } + tokens = attributes.getValue("tokens"); + if (tokens == null) { + log.warn("Skipping violation due to missing 'tokens' attribute"); + return; + } + } else if (qName.equals("file")) { + final String path = attributes.getValue("path"); + if (path == null) { + log.warn("Skipping violation due to missing 'path' attribute"); + return; + } + final VCSFile file = path2File.get(path); + if (file == null) { + log.warn("Skipping violation due to missing file mapping ({})", + path); + return; + } + + final String bls = attributes.getValue("line"); + if (bls == null) { + log.warn("Skipping violation due to missing 'line' attribute"); + return; + } + final String els = attributes.getValue("endline"); + if (els == null) { + log.warn("Skipping violation due to missing 'endline' attribute"); + return; + } + final String bcs = attributes.getValue("column"); + if (bcs == null) { + log.warn("Skipping violation due to missing 'column' attribute"); + return; + } + final String ecs = attributes.getValue("endcolumn"); + if (ecs == null) { + log.warn("Skipping violation due to missing 'endcolumn' attribute"); + return; + } + try { + final int bl = parseInt(bls); + final int bc = parseInt(bcs); + final Optional begin = + file.positionOf(bl, bc, TAB_SIZE); + if (!begin.isPresent()) { + log.warn("Skipping violation due to not existing begin position. " + + "file: {}, line: {}, column: {}, tab size: {}", + file.getPath(), bl, bc, TAB_SIZE); + return; + } + final int el = parseInt(els); + final int ec = parseInt(ecs); + final Optional end = + file.positionOf(el, ec, TAB_SIZE); + if (!end.isPresent()) { + log.warn("Skipping violation due to not existing end position. " + + "file: {}, line: {}, column: {}, tab size: {}", + file.getPath(), el, ec, TAB_SIZE); + return; + } + + VCSFile.Range r = new VCSFile.Range(begin.get(), end.get()); + ranges.add(r); + + } catch (final IOException e) { + log.warn("Skipping violation due to an IO error while creating its range"); + } + } + + super.startElement(uri, localName, qName, attributes); + } + + @Override + public void endElement(String uri, String localName, String qName) throws SAXException { + if(qName == "duplication"){ + if(ranges.size() < 2){ + log.warn("Skipping violation because there is too few ranges"); + ranges = new ArrayList<>(); //Initialize ranges for new duplication. + return; + } + final CPDDuplication v = new CPDDuplication (ranges, parseInt(lines), parseInt(tokens)); + violations.add(v); + ranges = new ArrayList<>(); //Initialize ranges for new duplication. + } + } + + /** + * Returns a copy of the detected violations. + * + * @return + * A copy of the detected violations. + */ + public List getViolations() { + return new ArrayList<>(violations); + } +} diff --git a/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunner.java b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunner.java index 7aaacee8..734a506f 100644 --- a/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunner.java +++ b/libvcs4j-tools/src/main/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunner.java @@ -112,7 +112,7 @@ public PMDRunner(@NonNull final String... rules) * @throws IOException * If an error occurred while analyzing {@code revision}. */ - protected PMDDetectionResult analyze(@NonNull final Revision revision) + public PMDDetectionResult analyze(@NonNull final Revision revision) throws IOException { Validate.validateState(!rules.isEmpty()); diff --git a/libvcs4j-tools/src/test/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunnerTest.java b/libvcs4j-tools/src/test/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunnerTest.java index 5ca39848..632029cf 100644 --- a/libvcs4j-tools/src/test/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunnerTest.java +++ b/libvcs4j-tools/src/test/java/de/unibremen/informatik/st/libvcs4j/pmd/PMDRunnerTest.java @@ -1,54 +1,66 @@ -package de.unibremen.informatik.st.libvcs4j.pmd; - -import de.unibremen.informatik.st.libvcs4j.VCSFile; -import de.unibremen.informatik.st.libvcs4j.spoon.codesmell.RevisionMock; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.io.IOException; -import java.nio.file.Paths; - -import static org.assertj.core.api.Assertions.assertThat; - -public class PMDRunnerTest { - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Test - public void emptyIfBlocks() throws IOException { - RevisionMock revision = new RevisionMock(folder); - revision.addFile(Paths.get("godclass", "GodClass.java")); - VCSFile file = revision.getFiles().get(0); - - PMDRunner pmdRunner = new PMDRunner( - "category/java/errorprone.xml/EmptyIfStmt"); - PMDDetectionResult result = pmdRunner.analyze(revision); - assertThat(result.violationsOf(file)).hasSize(2); - - PMDViolation v1 = result.violationsOf(file).get(0); - assertThat(v1.getRange().getBegin().getLine()).isEqualTo(29); - assertThat(v1.getRange().getBegin().getColumn()).isEqualTo(40); - assertThat(v1.getRange().getEnd().getLine()).isEqualTo(29); - assertThat(v1.getRange().getEnd().getColumn()).isEqualTo(41); - assertThat(v1.getRange().readContent()).isEqualTo("{}"); - - PMDViolation v2 = result.violationsOf(file).get(1); - assertThat(v2.getRange().getBegin().getLine()).isEqualTo(33); - assertThat(v2.getRange().getBegin().getColumn()).isEqualTo(53); - assertThat(v2.getRange().getEnd().getLine()).isEqualTo(33); - assertThat(v2.getRange().getEnd().getColumn()).isEqualTo(54); - assertThat(v2.getRange().readContent()).isEqualTo("{}"); - } - - @Test - public void defaultCategories() throws IOException { - RevisionMock revision = new RevisionMock(folder); - revision.addFile(Paths.get("godclass", "GodClass.java")); - - PMDRunner pmdRunner = new PMDRunner(); - PMDDetectionResult result = pmdRunner.analyze(revision); - assertThat(result).isNotNull(); - } -} +package de.unibremen.informatik.st.libvcs4j.pmd; + +import de.unibremen.informatik.st.libvcs4j.VCSFile; +import de.unibremen.informatik.st.libvcs4j.spoon.codesmell.RevisionMock; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.assertj.core.api.Assertions.assertThat; + +public class PMDRunnerTest { + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @Test + public void emptyIfBlocks() throws IOException { + RevisionMock revision = new RevisionMock(folder); + revision.addFile(Paths.get("godclass", "GodClass.java")); + VCSFile file = revision.getFiles().get(0); + + PMDRunner pmdRunner = new PMDRunner( + "category/java/errorprone.xml/EmptyIfStmt"); + PMDDetectionResult result = pmdRunner.analyze(revision); + assertThat(result.violationsOf(file)).hasSize(2); + + PMDViolation v1 = result.violationsOf(file).get(0); + assertThat(v1.getRange().getBegin().getLine()).isEqualTo(29); + assertThat(v1.getRange().getBegin().getColumn()).isEqualTo(40); + assertThat(v1.getRange().getEnd().getLine()).isEqualTo(29); + assertThat(v1.getRange().getEnd().getColumn()).isEqualTo(41); + assertThat(v1.getRange().readContent()).isEqualTo("{}"); + + PMDViolation v2 = result.violationsOf(file).get(1); + assertThat(v2.getRange().getBegin().getLine()).isEqualTo(33); + assertThat(v2.getRange().getBegin().getColumn()).isEqualTo(53); + assertThat(v2.getRange().getEnd().getLine()).isEqualTo(33); + assertThat(v2.getRange().getEnd().getColumn()).isEqualTo(54); + assertThat(v2.getRange().readContent()).isEqualTo("{}"); + } + + @Test + public void defaultCategories() throws IOException { + RevisionMock revision = new RevisionMock(folder); + revision.addFile(Paths.get("godclass", "GodClass.java")); + + PMDRunner pmdRunner = new PMDRunner(); + PMDDetectionResult result = pmdRunner.analyze(revision); + assertThat(result).isNotNull(); + } + + @Test + public void CPDTest() throws IOException { + RevisionMock revision = new RevisionMock(folder); + revision.addFile(Paths.get("cpdtest","Test1.java")); + revision.addFile(Paths.get("cpdtest","Test2.java")); + + CPDRunner cpdRunner = new CPDRunner(100); + CPDDetectionResult result = cpdRunner.analyze(revision); + assertThat(result).isNotNull(); + } +} \ No newline at end of file