diff --git a/src/main/java/org/owasp/html/Encoding.java b/src/main/java/org/owasp/html/Encoding.java
index 20dbed99..7ffc7e1e 100644
--- a/src/main/java/org/owasp/html/Encoding.java
+++ b/src/main/java/org/owasp/html/Encoding.java
@@ -43,6 +43,7 @@ public final class Encoding {
* @return text/plain
* @deprecated specify whether s is in an attribute value
*/
+ @Deprecated
public static String decodeHtml(String s) {
return decodeHtml(s, false);
}
diff --git a/src/main/java/org/owasp/html/HtmlEntities.java b/src/main/java/org/owasp/html/HtmlEntities.java
index ca7a2248..c7138d04 100644
--- a/src/main/java/org/owasp/html/HtmlEntities.java
+++ b/src/main/java/org/owasp/html/HtmlEntities.java
@@ -2308,6 +2308,7 @@ final class HtmlEntities {
* @return The offset after the end of the decoded sequence in {@code html}.
* @deprecated specify whether html is in an attribute value.
*/
+ @Deprecated
public static int appendDecodedEntity(
String html, int offset, int limit, StringBuilder sb) {
return appendDecodedEntity(html, offset, limit, false, sb);
diff --git a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java
index 82ee2554..bae6d13e 100644
--- a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java
+++ b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java
@@ -1037,6 +1037,7 @@ public String apply(String elementName, List attrs) {
relValue = DEFAULT_RELS_ON_TARGETTED_LINKS_STR;
} else {
StringBuilder sb = new StringBuilder();
+ Set present = new HashSet();
if (relIndex >= 0) {
// Preserve values that are not explicitly skipped.
String rels = attrs.get(relIndex);
@@ -1047,7 +1048,9 @@ public String apply(String elementName, List attrs) {
if (skip.isEmpty()
|| !skip.contains(
Strings.toLowerCase(rels.substring(left, i)))) {
- sb.append(rels, left, i).append(' ');
+ String rel = rels.substring(left, i);
+ present.add(rel);
+ sb.append(rel).append(' ');
}
}
left = i + 1;
@@ -1055,17 +1058,24 @@ public String apply(String elementName, List attrs) {
}
}
for (String s : extra) {
- sb.append(s).append(' ');
+ if (!present.contains(s)) {
+ sb.append(s).append(' ');
+ present.add(s);
+ }
}
if (hasTarget) {
for (String s : whenTargetPresent) {
- sb.append(s).append(' ');
+ if (!present.contains(s)) {
+ sb.append(s).append(' ');
+ present.add(s);
+ }
}
}
int sblen = sb.length();
if (sblen == 0) {
relValue = "";
} else {
+ // Trim last space.
relValue = sb.substring(0, sb.length() - 1);
}
}
diff --git a/src/main/java/org/owasp/html/HtmlStreamRenderer.java b/src/main/java/org/owasp/html/HtmlStreamRenderer.java
index bb14e3ee..b81283aa 100644
--- a/src/main/java/org/owasp/html/HtmlStreamRenderer.java
+++ b/src/main/java/org/owasp/html/HtmlStreamRenderer.java
@@ -33,6 +33,7 @@
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
+import java.util.Set;
import javax.annotation.WillCloseWhenClosed;
import javax.annotation.concurrent.NotThreadSafe;
@@ -57,6 +58,8 @@ public class HtmlStreamRenderer implements HtmlStreamEventReceiver {
private StringBuilder pendingUnescaped;
private HtmlTextEscapingMode escapingMode = HtmlTextEscapingMode.PCDATA;
private boolean open;
+ /** The count of {@link #foreignContentRootElementNames} opened and not subsequently closed. */
+ private int foreignContentDepth = 0;
/**
* Factory.
@@ -168,7 +171,25 @@ private void writeOpenTag(
return;
}
- escapingMode = HtmlTextEscapingMode.getModeForTag(elementName);
+ if (foreignContentRootElementNames.contains(elementName)) {
+ foreignContentDepth += 1;
+ }
+
+ HtmlTextEscapingMode tentativeEscapingMode = HtmlTextEscapingMode.getModeForTag(elementName);
+ if (foreignContentDepth == 0) {
+ escapingMode = tentativeEscapingMode;
+ } else {
+ switch (tentativeEscapingMode) {
+ case PCDATA:
+ case VOID:
+ escapingMode = tentativeEscapingMode;
+ break;
+ default: // escape special characters but do not allow tags
+ escapingMode = HtmlTextEscapingMode.RCDATA;
+ break;
+ }
+ }
+
switch (escapingMode) {
case CDATA_SOMETIMES:
@@ -240,6 +261,10 @@ private final void writeCloseTag(String uncanonElementName)
return;
}
+ if (foreignContentDepth != 0 && foreignContentRootElementNames.contains(elementName)) {
+ foreignContentDepth -= 1;
+ }
+
if (pendingUnescaped != null) {
if (!lastTagOpened.equals(elementName)) {
error("Tag content cannot appear inside CDATA element", elementName);
@@ -436,4 +461,6 @@ public void close() throws IOException {
private static boolean isTagEnd(char ch) {
return ch < 63 && 0 != (TAG_ENDS & (1L << ch));
}
+
+ private static final Set foreignContentRootElementNames = Set.of("svg", "math");
}
diff --git a/src/test/java/org/owasp/html/Benchmark.java b/src/test/java/org/owasp/html/Benchmark.java
index 2a937fc6..e4a917ad 100644
--- a/src/test/java/org/owasp/html/Benchmark.java
+++ b/src/test/java/org/owasp/html/Benchmark.java
@@ -58,7 +58,7 @@ public class Benchmark {
* specifies a benchmark to run and unspecified ones are not run.
*/
public static void main(String[] args) throws Exception {
- String html = Files.readString(new File(args[0]).toPath(), StandardCharsets.UTF_8);
+ String html = new String(Files.readAllBytes(new File(args[0]).toPath()), StandardCharsets.UTF_8);
boolean timeLibhtmlparser = true;
boolean timeSanitize = true;
diff --git a/src/test/java/org/owasp/html/HtmlLexerTest.java b/src/test/java/org/owasp/html/HtmlLexerTest.java
index d2a680df..2ebf55ea 100644
--- a/src/test/java/org/owasp/html/HtmlLexerTest.java
+++ b/src/test/java/org/owasp/html/HtmlLexerTest.java
@@ -45,12 +45,12 @@ public class HtmlLexerTest extends TestCase {
@Test
public final void testHtmlLexer() throws Exception {
// Do the lexing.
- String input = new String(Files.readString(Paths.get(getClass().getResource("htmllexerinput1.html").toURI()), StandardCharsets.UTF_8));
+ String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
StringBuilder actual = new StringBuilder();
lex(input, actual);
// Get the golden.
- String golden = new String(Files.readString(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI()), StandardCharsets.UTF_8));
+ String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
// Compare.
assertEquals(golden, actual.toString());
diff --git a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java
index 43d15788..03a333cd 100644
--- a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java
+++ b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java
@@ -913,7 +913,7 @@ public static final void testLinkRelsWhenRelPresent() {
}
@Test
- public static final void testRelLinksWhenRelisPartOfData() {
+ public final void testRelLinksWhenRelIsPartOfData() {
PolicyFactory pf = new HtmlPolicyBuilder()
.allowElements("a")
.allowAttributes("href").onElements("a")
@@ -922,7 +922,7 @@ public static final void testRelLinksWhenRelisPartOfData() {
.allowStandardUrlProtocols()
.toFactory();
String toSanitize = "test";
- assertTrue("Failure in testRelLinksWhenRelisPartOfData", pf.sanitize(toSanitize).equals(toSanitize));
+ assertEquals(toSanitize, pf.sanitize(toSanitize));
}
@Test