diff --git a/src/main/java/org/owasp/html/Encoding.java b/src/main/java/org/owasp/html/Encoding.java index 20dbed99..7ffc7e1e 100644 --- a/src/main/java/org/owasp/html/Encoding.java +++ b/src/main/java/org/owasp/html/Encoding.java @@ -43,6 +43,7 @@ public final class Encoding { * @return text/plain * @deprecated specify whether s is in an attribute value */ + @Deprecated public static String decodeHtml(String s) { return decodeHtml(s, false); } diff --git a/src/main/java/org/owasp/html/HtmlEntities.java b/src/main/java/org/owasp/html/HtmlEntities.java index ca7a2248..c7138d04 100644 --- a/src/main/java/org/owasp/html/HtmlEntities.java +++ b/src/main/java/org/owasp/html/HtmlEntities.java @@ -2308,6 +2308,7 @@ final class HtmlEntities { * @return The offset after the end of the decoded sequence in {@code html}. * @deprecated specify whether html is in an attribute value. */ + @Deprecated public static int appendDecodedEntity( String html, int offset, int limit, StringBuilder sb) { return appendDecodedEntity(html, offset, limit, false, sb); diff --git a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java index 82ee2554..bae6d13e 100644 --- a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java +++ b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java @@ -1037,6 +1037,7 @@ public String apply(String elementName, List attrs) { relValue = DEFAULT_RELS_ON_TARGETTED_LINKS_STR; } else { StringBuilder sb = new StringBuilder(); + Set present = new HashSet(); if (relIndex >= 0) { // Preserve values that are not explicitly skipped. String rels = attrs.get(relIndex); @@ -1047,7 +1048,9 @@ public String apply(String elementName, List attrs) { if (skip.isEmpty() || !skip.contains( Strings.toLowerCase(rels.substring(left, i)))) { - sb.append(rels, left, i).append(' '); + String rel = rels.substring(left, i); + present.add(rel); + sb.append(rel).append(' '); } } left = i + 1; @@ -1055,17 +1058,24 @@ public String apply(String elementName, List attrs) { } } for (String s : extra) { - sb.append(s).append(' '); + if (!present.contains(s)) { + sb.append(s).append(' '); + present.add(s); + } } if (hasTarget) { for (String s : whenTargetPresent) { - sb.append(s).append(' '); + if (!present.contains(s)) { + sb.append(s).append(' '); + present.add(s); + } } } int sblen = sb.length(); if (sblen == 0) { relValue = ""; } else { + // Trim last space. relValue = sb.substring(0, sb.length() - 1); } } diff --git a/src/main/java/org/owasp/html/HtmlStreamRenderer.java b/src/main/java/org/owasp/html/HtmlStreamRenderer.java index bb14e3ee..b81283aa 100644 --- a/src/main/java/org/owasp/html/HtmlStreamRenderer.java +++ b/src/main/java/org/owasp/html/HtmlStreamRenderer.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.util.Iterator; import java.util.List; +import java.util.Set; import javax.annotation.WillCloseWhenClosed; import javax.annotation.concurrent.NotThreadSafe; @@ -57,6 +58,8 @@ public class HtmlStreamRenderer implements HtmlStreamEventReceiver { private StringBuilder pendingUnescaped; private HtmlTextEscapingMode escapingMode = HtmlTextEscapingMode.PCDATA; private boolean open; + /** The count of {@link #foreignContentRootElementNames} opened and not subsequently closed. */ + private int foreignContentDepth = 0; /** * Factory. @@ -168,7 +171,25 @@ private void writeOpenTag( return; } - escapingMode = HtmlTextEscapingMode.getModeForTag(elementName); + if (foreignContentRootElementNames.contains(elementName)) { + foreignContentDepth += 1; + } + + HtmlTextEscapingMode tentativeEscapingMode = HtmlTextEscapingMode.getModeForTag(elementName); + if (foreignContentDepth == 0) { + escapingMode = tentativeEscapingMode; + } else { + switch (tentativeEscapingMode) { + case PCDATA: + case VOID: + escapingMode = tentativeEscapingMode; + break; + default: // escape special characters but do not allow tags + escapingMode = HtmlTextEscapingMode.RCDATA; + break; + } + } + switch (escapingMode) { case CDATA_SOMETIMES: @@ -240,6 +261,10 @@ private final void writeCloseTag(String uncanonElementName) return; } + if (foreignContentDepth != 0 && foreignContentRootElementNames.contains(elementName)) { + foreignContentDepth -= 1; + } + if (pendingUnescaped != null) { if (!lastTagOpened.equals(elementName)) { error("Tag content cannot appear inside CDATA element", elementName); @@ -436,4 +461,6 @@ public void close() throws IOException { private static boolean isTagEnd(char ch) { return ch < 63 && 0 != (TAG_ENDS & (1L << ch)); } + + private static final Set foreignContentRootElementNames = Set.of("svg", "math"); } diff --git a/src/test/java/org/owasp/html/Benchmark.java b/src/test/java/org/owasp/html/Benchmark.java index 2a937fc6..e4a917ad 100644 --- a/src/test/java/org/owasp/html/Benchmark.java +++ b/src/test/java/org/owasp/html/Benchmark.java @@ -58,7 +58,7 @@ public class Benchmark { * specifies a benchmark to run and unspecified ones are not run. */ public static void main(String[] args) throws Exception { - String html = Files.readString(new File(args[0]).toPath(), StandardCharsets.UTF_8); + String html = new String(Files.readAllBytes(new File(args[0]).toPath()), StandardCharsets.UTF_8); boolean timeLibhtmlparser = true; boolean timeSanitize = true; diff --git a/src/test/java/org/owasp/html/HtmlLexerTest.java b/src/test/java/org/owasp/html/HtmlLexerTest.java index d2a680df..2ebf55ea 100644 --- a/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -45,12 +45,12 @@ public class HtmlLexerTest extends TestCase { @Test public final void testHtmlLexer() throws Exception { // Do the lexing. - String input = new String(Files.readString(Paths.get(getClass().getResource("htmllexerinput1.html").toURI()), StandardCharsets.UTF_8)); + String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8); StringBuilder actual = new StringBuilder(); lex(input, actual); // Get the golden. - String golden = new String(Files.readString(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI()), StandardCharsets.UTF_8)); + String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8); // Compare. assertEquals(golden, actual.toString()); diff --git a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java index 43d15788..03a333cd 100644 --- a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java +++ b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java @@ -913,7 +913,7 @@ public static final void testLinkRelsWhenRelPresent() { } @Test - public static final void testRelLinksWhenRelisPartOfData() { + public final void testRelLinksWhenRelIsPartOfData() { PolicyFactory pf = new HtmlPolicyBuilder() .allowElements("a") .allowAttributes("href").onElements("a") @@ -922,7 +922,7 @@ public static final void testRelLinksWhenRelisPartOfData() { .allowStandardUrlProtocols() .toFactory(); String toSanitize = "test"; - assertTrue("Failure in testRelLinksWhenRelisPartOfData", pf.sanitize(toSanitize).equals(toSanitize)); + assertEquals(toSanitize, pf.sanitize(toSanitize)); } @Test