diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..140a0f9c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,13 @@ +/COPYING text +/src/test/resources/org/owasp/html/* text eol=lf +.gitattributes text +.gitignore text +*.html text +*.java text +*.js text +*.json text +*.md text +*.sh text +*.txt text +*.xml text +*.yml text diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml new file mode 100644 index 00000000..0487ce5c --- /dev/null +++ b/.github/workflows/maven.yml @@ -0,0 +1,43 @@ +# This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-maven + +name: Java CI with Maven + +on: + push: + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/maven.yml' + pull_request: + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/maven.yml' + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + java: [ '11', '17', '21' ] + + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.Java }} + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.Java }} + distribution: 'temurin' + cache: maven + - name: Build with Maven + run: mvn --batch-mode --errors --fail-at-end --show-version --update-snapshots verify + - name: Store artifact + if: ${{ matrix.Java == 11 }} + uses: actions/upload-artifact@v4 + with: + name: jar-jdk${{ matrix.Java }} + path: target/*.jar + retention-days: 7 diff --git a/README.md b/README.md index e6249953..ec9591d6 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A fast and easy to configure HTML Sanitizer written in Java which lets you include HTML authored by third-parties in your web application while protecting against XSS. -The existing dependencies are on guava and JSR 305. The other jars +The existing dependency is on JSR 305. The other jars are only needed by the test suite. The JSR 305 dependency is a compile-only dependency, only needed for annotations. diff --git a/RELEASE-checklist.sh b/RELEASE-checklist.sh index 57e611ba..4f9ba18d 100644 --- a/RELEASE-checklist.sh +++ b/RELEASE-checklist.sh @@ -8,7 +8,6 @@ set -e # Make sure the build is ok via -mvn -Dguava.version=27.0-jre -f aggregate clean verify javadoc:jar source:jar mvn -f aggregate clean verify jacoco:report site javadoc:jar source:jar mvn install mvn org.sonatype.ossindex.maven:ossindex-maven-plugin:audit -f aggregate diff --git a/docs/getting_started.md b/docs/getting_started.md index 44de7fe8..fdb5addf 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -4,12 +4,11 @@ If you are using Maven then follow the [maven](maven.md) directions to add a dependency. Otherwise, -[download prebuilt jars](https://search.maven.org/#artifactdetails%7Ccom.googlecode.owasp-java-html-sanitizer%7Cowasp-java-html-sanitizer%7C20180219.1%7Cjar) +[download prebuilt jars](https://search.maven.org/artifact/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/) or `git clone git@github.com:OWASP/java-html-sanitizer.git` and build the latest source. -Unless maven is managing your CLASSPATH for you, you need to add both `owasp-java-html-sanitizer.jar` and the -Guava JAR. +Unless maven is managing your CLASSPATH for you, you need to add `owasp-java-html-sanitizer.jar`. Once you have your CLASSPATH set up correctly with the relevant JARs you should be able to add diff --git a/parent/pom.xml b/parent/pom.xml index ceaac711..968711c2 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -257,7 +257,7 @@ application while protecting against XSS. com.google.protobuf protobuf-java - 3.9.1 + 3.16.3 provided diff --git a/src/main/java/org/owasp/html/Encoding.java b/src/main/java/org/owasp/html/Encoding.java index 20dbed99..7ffc7e1e 100644 --- a/src/main/java/org/owasp/html/Encoding.java +++ b/src/main/java/org/owasp/html/Encoding.java @@ -43,6 +43,7 @@ public final class Encoding { * @return text/plain * @deprecated specify whether s is in an attribute value */ + @Deprecated public static String decodeHtml(String s) { return decodeHtml(s, false); } diff --git a/src/main/java/org/owasp/html/HtmlEntities.java b/src/main/java/org/owasp/html/HtmlEntities.java index 8b410ea9..6ba51a48 100644 --- a/src/main/java/org/owasp/html/HtmlEntities.java +++ b/src/main/java/org/owasp/html/HtmlEntities.java @@ -2309,6 +2309,7 @@ final class HtmlEntities { * @return The offset after the end of the decoded sequence in {@code html}. * @deprecated specify whether html is in an attribute value. */ + @Deprecated public static int appendDecodedEntity( String html, int offset, int limit, StringBuilder sb) { return appendDecodedEntity(html, offset, limit, false, sb); diff --git a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java index d81c53d2..1a9976af 100644 --- a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java +++ b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java @@ -33,7 +33,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -966,12 +965,11 @@ public AttributeBuilder matching( */ @SuppressWarnings("synthetic-access") public HtmlPolicyBuilder globally() { - if(attributeNames.get(0).equals("style")) { - return allowStyling(); - } else { - return HtmlPolicyBuilder.this.allowAttributesGlobally( - policy, attributeNames); + if (attributeNames.contains("style")) { + allowStyling(); } + return HtmlPolicyBuilder.this.allowAttributesGlobally( + policy, attributeNames); } /** @@ -1040,6 +1038,7 @@ public String apply(String elementName, List attrs) { relValue = DEFAULT_RELS_ON_TARGETTED_LINKS_STR; } else { StringBuilder sb = new StringBuilder(); + Set present = new HashSet(); if (relIndex >= 0) { // Preserve values that are not explicitly skipped. String rels = attrs.get(relIndex); @@ -1050,7 +1049,9 @@ public String apply(String elementName, List attrs) { if (skip.isEmpty() || !skip.contains( Strings.toLowerCase(rels.substring(left, i)))) { - sb.append(rels, left, i).append(' '); + String rel = rels.substring(left, i); + present.add(rel); + sb.append(rel).append(' '); } } left = i + 1; @@ -1058,17 +1059,24 @@ public String apply(String elementName, List attrs) { } } for (String s : extra) { - sb.append(s).append(' '); + if (!present.contains(s)) { + sb.append(s).append(' '); + present.add(s); + } } if (hasTarget) { for (String s : whenTargetPresent) { - sb.append(s).append(' '); + if (!present.contains(s)) { + sb.append(s).append(' '); + present.add(s); + } } } int sblen = sb.length(); if (sblen == 0) { relValue = ""; } else { + // Trim last space. relValue = sb.substring(0, sb.length() - 1); } } diff --git a/src/main/java/org/owasp/html/HtmlStreamRenderer.java b/src/main/java/org/owasp/html/HtmlStreamRenderer.java index bb14e3ee..b81283aa 100644 --- a/src/main/java/org/owasp/html/HtmlStreamRenderer.java +++ b/src/main/java/org/owasp/html/HtmlStreamRenderer.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.util.Iterator; import java.util.List; +import java.util.Set; import javax.annotation.WillCloseWhenClosed; import javax.annotation.concurrent.NotThreadSafe; @@ -57,6 +58,8 @@ public class HtmlStreamRenderer implements HtmlStreamEventReceiver { private StringBuilder pendingUnescaped; private HtmlTextEscapingMode escapingMode = HtmlTextEscapingMode.PCDATA; private boolean open; + /** The count of {@link #foreignContentRootElementNames} opened and not subsequently closed. */ + private int foreignContentDepth = 0; /** * Factory. @@ -168,7 +171,25 @@ private void writeOpenTag( return; } - escapingMode = HtmlTextEscapingMode.getModeForTag(elementName); + if (foreignContentRootElementNames.contains(elementName)) { + foreignContentDepth += 1; + } + + HtmlTextEscapingMode tentativeEscapingMode = HtmlTextEscapingMode.getModeForTag(elementName); + if (foreignContentDepth == 0) { + escapingMode = tentativeEscapingMode; + } else { + switch (tentativeEscapingMode) { + case PCDATA: + case VOID: + escapingMode = tentativeEscapingMode; + break; + default: // escape special characters but do not allow tags + escapingMode = HtmlTextEscapingMode.RCDATA; + break; + } + } + switch (escapingMode) { case CDATA_SOMETIMES: @@ -240,6 +261,10 @@ private final void writeCloseTag(String uncanonElementName) return; } + if (foreignContentDepth != 0 && foreignContentRootElementNames.contains(elementName)) { + foreignContentDepth -= 1; + } + if (pendingUnescaped != null) { if (!lastTagOpened.equals(elementName)) { error("Tag content cannot appear inside CDATA element", elementName); @@ -436,4 +461,6 @@ public void close() throws IOException { private static boolean isTagEnd(char ch) { return ch < 63 && 0 != (TAG_ENDS & (1L << ch)); } + + private static final Set foreignContentRootElementNames = Set.of("svg", "math"); } diff --git a/src/main/java/org/owasp/html/AllExamples.java b/src/test/java/org/owasp/html/AllExamples.java similarity index 100% rename from src/main/java/org/owasp/html/AllExamples.java rename to src/test/java/org/owasp/html/AllExamples.java diff --git a/src/test/java/org/owasp/html/Benchmark.java b/src/test/java/org/owasp/html/Benchmark.java index 2a937fc6..e4a917ad 100644 --- a/src/test/java/org/owasp/html/Benchmark.java +++ b/src/test/java/org/owasp/html/Benchmark.java @@ -58,7 +58,7 @@ public class Benchmark { * specifies a benchmark to run and unspecified ones are not run. */ public static void main(String[] args) throws Exception { - String html = Files.readString(new File(args[0]).toPath(), StandardCharsets.UTF_8); + String html = new String(Files.readAllBytes(new File(args[0]).toPath()), StandardCharsets.UTF_8); boolean timeLibhtmlparser = true; boolean timeSanitize = true; diff --git a/src/test/java/org/owasp/html/CssSchemaTest.java b/src/test/java/org/owasp/html/CssSchemaTest.java index 91c6a792..e3b8e2f9 100644 --- a/src/test/java/org/owasp/html/CssSchemaTest.java +++ b/src/test/java/org/owasp/html/CssSchemaTest.java @@ -55,6 +55,9 @@ public static final void testDangerousProperties() { // Prefix corner cases. "-", "-moz-", + "-ms-", + "-o-", + "-webkit-", }) { assertSame(key, CssSchema.DISALLOWED, CssSchema.DEFAULT.forKey(key)); } diff --git a/src/test/java/org/owasp/html/HtmlLexerTest.java b/src/test/java/org/owasp/html/HtmlLexerTest.java index d2a680df..2ebf55ea 100644 --- a/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -45,12 +45,12 @@ public class HtmlLexerTest extends TestCase { @Test public final void testHtmlLexer() throws Exception { // Do the lexing. - String input = new String(Files.readString(Paths.get(getClass().getResource("htmllexerinput1.html").toURI()), StandardCharsets.UTF_8)); + String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8); StringBuilder actual = new StringBuilder(); lex(input, actual); // Get the golden. - String golden = new String(Files.readString(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI()), StandardCharsets.UTF_8)); + String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8); // Compare. assertEquals(golden, actual.toString()); diff --git a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java index f19b28d6..746a1017 100644 --- a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java +++ b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java @@ -229,6 +229,48 @@ public static final void testStyleFiltering() { .allowStandardUrlProtocols())); } + @Test + public void testSpecificStyleFilterung() { + assertEquals( + Arrays.stream(new String[] { + "

Header

", + "

Paragraph 1

", + "

Click me out

", + "

", + "

Fancy with soupy tags.", + "

Stylish Para 1

", + "

Stylish Para 2

", + ""}).collect(Collectors.joining("\n")), + apply(new HtmlPolicyBuilder() + .allowCommonInlineFormattingElements() + .allowCommonBlockElements() + .allowStyling(CssSchema.withProperties( + List.of("color", "text-align", "font-size"))) + .allowStandardUrlProtocols())); + } + + @Test + public void testUnionStyleFilterung() { + assertEquals( + Arrays.stream(new String[] { + "

Header

", + "

Paragraph 1

", + "

Click me out

", + "

", + "

Fancy with soupy tags.", + "

Stylish Para 1

", + "

Stylish Para 2

", + ""}).collect(Collectors.joining("\n")), + apply(new HtmlPolicyBuilder() + .allowCommonInlineFormattingElements() + .allowCommonBlockElements() + .allowStyling(CssSchema.withProperties( + List.of("color", "text-align"))) + .allowStyling( // union allowed style properties + CssSchema.withProperties(List.of("font-size"))) + .allowStandardUrlProtocols())); + } + @Test public static final void testElementTransforming() { assertEquals( @@ -289,6 +331,25 @@ public static final void testAllowUrlProtocols() { .allowUrlProtocols("http"))); } + @Test + public static final void testDisallowUrlProtocols() { + assertEquals( + Arrays.stream(new String[] { + "Header", + "Paragraph 1", + "Click me out", + "\"local-canary\"", + "Fancy with soupy tags.", + "Stylish Para 1", + "Stylish Para 2", + ""}).collect(Collectors.joining("\n")), + apply(new HtmlPolicyBuilder() + .allowElements("img") + .allowAttributes("src", "alt").onElements("img") + .allowUrlProtocols("http", "https") + .disallowUrlProtocols("http"))); + } + @Test public static final void testPossibleFalloutFromIssue5() { assertEquals( @@ -801,7 +862,7 @@ public static final void testLinkRelsWhenRelPresent() { } @Test - public static final void testRelLinksWhenRelisPartOfData() { + public final void testRelLinksWhenRelIsPartOfData() { PolicyFactory pf = new HtmlPolicyBuilder() .allowElements("a") .allowAttributes("href").onElements("a") @@ -810,7 +871,7 @@ public static final void testRelLinksWhenRelisPartOfData() { .allowStandardUrlProtocols() .toFactory(); String toSanitize = "test"; - assertTrue("Failure in testRelLinksWhenRelisPartOfData", pf.sanitize(toSanitize).equals(toSanitize)); + assertEquals(toSanitize, pf.sanitize(toSanitize)); } @Test @@ -847,6 +908,52 @@ public static final void testEmptyDefaultLinkRelsSet() { pf.sanitize("eg")); } + @Test + public static final void testRequireAndSkipRels() { + PolicyFactory pf = new HtmlPolicyBuilder() + .allowElements("a") + .allowAttributes("href", "target").onElements("a") + .allowStandardUrlProtocols() + .requireRelsOnLinks("noreferrer") + .skipRelsOnLinks("noopener", "noreferrer") + .toFactory(); + + assertEquals( + "eg", + pf.sanitize("eg")); + + assertEquals( + "eg", + pf.sanitize("eg")); + + assertEquals( + "eg", + pf.sanitize("eg")); + } + + @Test + public static final void testSkipAndRequireRels() { + PolicyFactory pf = new HtmlPolicyBuilder() + .allowElements("a") + .allowAttributes("href", "target").onElements("a") + .allowStandardUrlProtocols() + .skipRelsOnLinks("noopener", "noreferrer") + .requireRelsOnLinks("noreferrer") + .toFactory(); + + assertEquals( + "eg", + pf.sanitize("eg")); + + assertEquals( + "eg", + pf.sanitize("eg")); + + assertEquals( + "eg", + pf.sanitize("eg")); + } + @Test public static final void testExplicitRelsSkip() { PolicyFactory pf = new HtmlPolicyBuilder() @@ -913,6 +1020,64 @@ public static final void testDirLi() { "
  • something
  • ")); } + @Test + public void testDisallowTextIn() { + HtmlPolicyBuilder sharedPolicyBuilder = new HtmlPolicyBuilder() + .allowElements("div") + .allowAttributes("style").onElements("div"); + + PolicyFactory allowPolicy = sharedPolicyBuilder.toFactory(); + assertEquals("
    Some Text
    ", + allowPolicy.sanitize("
    Some Text
    ")); + + PolicyFactory disallowTextPolicy = + sharedPolicyBuilder.disallowTextIn("div").toFactory(); + assertEquals("
    ", + disallowTextPolicy.sanitize( + "
    Some Text
    ")); + } + + @Test + public void testDisallowAttribute() { + HtmlPolicyBuilder sharedPolicyBuilder = new HtmlPolicyBuilder() + .allowElements("div", "p") + .allowAttributes("style").onElements("div", "p"); + + PolicyFactory allowPolicy = sharedPolicyBuilder.toFactory(); + assertEquals( + "

    Some

    Text
    ", + allowPolicy.sanitize( + "

    Some

    Text
    ")); + + PolicyFactory disallowTextPolicy = + sharedPolicyBuilder.disallowAttributes("style").onElements("p").toFactory(); + assertEquals("

    Some

    Text
    ", + disallowTextPolicy.sanitize( + "

    Some

    Text
    ")); + } + + @Test + public void testCreativeCSSStyling() { + PolicyFactory policy = new HtmlPolicyBuilder() + .allowElements("p") + .allowAttributes("style").onElements("p").allowStyling().toFactory(); + + assertEquals("

    Some

    ", + policy.sanitize("

    Some

    ")); + + assertEquals("

    Some

    ", + policy.sanitize("

    Some

    ")); + + assertEquals("

    Some

    ", + policy.sanitize("

    Some

    ")); + + assertEquals("

    Some

    ", + policy.sanitize("

    Some

    ")); + + assertEquals("

    Some

    ", + policy.sanitize("

    Some

    ")); + } + @Test public static void testScriptTagWithCommentBlockContainingHtmlCommentEnd() { PolicyFactory scriptSanitizer = new HtmlPolicyBuilder() @@ -1007,6 +1172,12 @@ public static final void testTextareaIsNotTextArea() { assertEquals("x", textAreaPolicy.sanitize(input)); } + @Test + public static final void testHtmlPolicyBuilderDefinitionWithNoAttributesDefinedGlobally() { + // Does not crash with a runtime exception + new HtmlPolicyBuilder().allowElements().allowAttributes().globally().toFactory(); + } + @Test public static final void testCSSFontSize() { HtmlPolicyBuilder builder = new HtmlPolicyBuilder(); diff --git a/src/test/java/org/owasp/html/SanitizersTest.java b/src/test/java/org/owasp/html/SanitizersTest.java index 25ede957..d14d1156 100644 --- a/src/test/java/org/owasp/html/SanitizersTest.java +++ b/src/test/java/org/owasp/html/SanitizersTest.java @@ -158,6 +158,58 @@ public static final void testImages() { ); } + @Test + public static final void testIntegerAttributePolicy() { + PolicyFactory s = Sanitizers.IMAGES; + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + + assertEquals( + "\"y\"", + s.sanitize( + "\"y\"") + ); + } + @Test public static final void testLinks() { PolicyFactory s = Sanitizers.LINKS;