Skip to content

Commit

Permalink
Do not lcase element or attribute names that match SVG or MathML name… (
Browse files Browse the repository at this point in the history
#206)

* Do not lcase element or attribute names that match SVG or MathML names exactly

> Currently all names are converted to lowercase which is ok when
> you're using it for HTML only, but if there is an SVG image nested
> inside the HTML it breaks.  For example, when `viewBox` attribute is
> converted to `viewbox` the image is not displayed correctly.

This commit splits *HtmlLexer*.*canonicalName* into variants which preserve
items on whitelists derived from the SVG and MathML specifications, and
adjusts callers of *canonicalName* to use the appropriate variant.

Fixes #182

* add unittests for mixed-case SVG names
  • Loading branch information
mikesamuel authored Jul 13, 2020
1 parent fd6b2dd commit eb6ef02
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

/**
* Encapsulates all the information needed by the
* {@link ElementAndAttributePolicySanitizerPolicy} to sanitize one kind
* {@link ElementAndAttributePolicyBasedSanitizerPolicy} to sanitize one kind
* of element.
*/
@Immutable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void openTag(String elementName, List<String> attrs) {

adjustedElementName = policies.elPolicy.apply(elementName, attrs);
if (adjustedElementName != null) {
adjustedElementName = HtmlLexer.canonicalName(adjustedElementName);
adjustedElementName = HtmlLexer.canonicalElementName(adjustedElementName);
}
} else {
adjustedElementName = null;
Expand Down
155 changes: 144 additions & 11 deletions src/main/java/org/owasp/html/HtmlLexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,27 @@ public HtmlLexer(String input) {

/**
* Normalize case of names that are not name-spaced. This lower-cases HTML
* element and attribute names, but not ones for embedded SVG or MATHML.
* element names, but not ones for embedded SVG or MathML.
*/
static String canonicalName(String elementOrAttribName) {
return elementOrAttribName.indexOf(':') >= 0
? elementOrAttribName : Strings.toLowerCase(elementOrAttribName);
static String canonicalElementName(String elementName) {
return elementName.indexOf(':') >= 0 || mixedCaseForeignElementNames.contains(elementName)
? elementName : Strings.toLowerCase(elementName);
}

/**
* Normalize case of names that are not name-spaced. This lower-cases HTML
* attribute names, but not ones for embedded SVG or MathML.
*/
static String canonicalAttributeName(String attribName) {
return attribName.indexOf(':') >= 0 || mixedCaseForeignAttributeNames.contains(attribName)
? attribName : Strings.toLowerCase(attribName);
}

/**
* Normalize case of keywords in attribute values.
*/
public static String canonicalKeywordAttributeValue(String keywordValue) {
return Strings.toLowerCase(keywordValue);
}

/**
Expand Down Expand Up @@ -243,16 +259,133 @@ private void pushbackToken(HtmlToken token) {

/** Can the attribute appear in HTML without a value. */
private static boolean isValuelessAttribute(String attribName) {
boolean valueless = VALUELESS_ATTRIB_NAMES.contains(
Strings.toLowerCase(attribName));
return valueless;
return VALUELESS_ATTRIB_NAMES.contains(canonicalAttributeName(attribName));
}

// From http://issues.apache.org/jira/browse/XALANC-519
private static final Set<String> VALUELESS_ATTRIB_NAMES = ImmutableSet.of(
"checked", "compact", "declare", "defer", "disabled",
"ismap", "multiple", "nohref", "noresize", "noshade",
"nowrap", "readonly", "selected");

private static final ImmutableSet<String> mixedCaseForeignAttributeNames = ImmutableSet.of(
"attributeName",
"attributeType",
"baseFrequency",
"baseProfile",
"calcMode",
"clipPathUnits",
"contentScriptType",
"defaultAction",
"definitionURL",
"diffuseConstant",
"edgeMode",
"externalResourcesRequired",
"filterUnits",
"focusHighlight",
"gradientTransform",
"gradientUnits",
"initialVisibility",
"kernelMatrix",
"kernelUnitLength",
"keyPoints",
"keySplines",
"keyTimes",
"lengthAdjust",
"limitingConeAngle",
"markerHeight",
"markerUnits",
"markerWidth",
"maskContentUnits",
"maskUnits",
"mediaCharacterEncoding",
"mediaContentEncodings",
"mediaSize",
"mediaTime",
"numOctaves",
"pathLength",
"patternContentUnits",
"patternTransform",
"patternUnits",
"playbackOrder",
"pointsAtX",
"pointsAtY",
"pointsAtZ",
"preserveAlpha",
"preserveAspectRatio",
"primitiveUnits",
"refX",
"refY",
"repeatCount",
"repeatDur",
"requiredExtensions",
"requiredFeatures",
"requiredFonts",
"requiredFormats",
"schemaLocation",
"snapshotTime",
"specularConstant",
"specularExponent",
"spreadMethod",
"startOffset",
"stdDeviation",
"stitchTiles",
"surfaceScale",
"syncBehavior",
"syncBehaviorDefault",
"syncMaster",
"syncTolerance",
"syncToleranceDefault",
"systemLanguage",
"tableValues",
"targetX",
"targetY",
"textLength",
"timelineBegin",
"transformBehavior",
"viewBox",
"xChannelSelector",
"yChannelSelector",
"zoomAndPan"
);

private static final ImmutableSet<String> mixedCaseForeignElementNames = ImmutableSet.of(
"animateColor",
"animateMotion",
"animateTransform",
"clipPath",
"feBlend",
"feColorMatrix",
"feComponentTransfer",
"feComposite",
"feConvolveMatrix",
"feDiffuseLighting",
"feDisplacementMap",
"feDistantLight",
"feDropShadow",
"feFlood",
"feFuncA",
"feFuncB",
"feFuncG",
"feFuncR",
"feGaussianBlur",
"feImage",
"feMerge",
"feMergeNode",
"feMorphology",
"feOffset",
"fePointLight",
"feSpecularLighting",
"feSpotLight",
"feTile",
"feTurbulence",
"foreignObject",
"linearGradient",
"radialGradient",
"solidColor",
"textArea",
"textPath"
);
}

/**
Expand Down Expand Up @@ -311,7 +444,7 @@ protected HtmlToken produce() {
switch (token.type) {
case TAGBEGIN:
{
String canonTagName = canonicalName(
String canonTagName = canonicalElementName(
token.start + 1, token.end);
if (HtmlTextEscapingMode.isTagFollowedByLiteralContent(
canonTagName)) {
Expand Down Expand Up @@ -478,7 +611,7 @@ private HtmlToken parseToken() {
if (this.inEscapeExemptBlock
&& '/' == input.charAt(start + 1)
&& textEscapingMode != HtmlTextEscapingMode.PLAIN_TEXT
&& canonicalName(start + 2, end)
&& canonicalElementName(start + 2, end)
.equals(escapeExemptTagName)) {
this.inEscapeExemptBlock = false;
this.escapeExemptTagName = null;
Expand Down Expand Up @@ -612,8 +745,8 @@ && canonicalName(start + 2, end)
return result;
}

private String canonicalName(int start, int end) {
return HtmlLexer.canonicalName(input.substring(start, end));
private String canonicalElementName(int start, int end) {
return HtmlLexer.canonicalElementName(input.substring(start, end));
}

private static boolean isIdentStart(char ch) {
Expand Down
18 changes: 9 additions & 9 deletions src/main/java/org/owasp/html/HtmlPolicyBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ public HtmlPolicyBuilder allowElements(
ElementPolicy policy, String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
ElementPolicy newPolicy = ElementPolicy.Util.join(
elPolicies.get(elementName), policy);
// Don't remove if newPolicy is the always reject policy since we want
Expand Down Expand Up @@ -286,7 +286,7 @@ public HtmlPolicyBuilder allowCommonBlockElements() {
public HtmlPolicyBuilder allowTextIn(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
textContainers.put(elementName, true);
}
return this;
Expand All @@ -305,7 +305,7 @@ public HtmlPolicyBuilder allowTextIn(String... elementNames) {
public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
textContainers.put(elementName, false);
}
return this;
Expand All @@ -321,7 +321,7 @@ public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
skipIssueTagMap.put(elementName, HtmlTagSkipType.DO_NOT_SKIP);
}
return this;
Expand All @@ -336,7 +336,7 @@ public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
skipIssueTagMap.put(elementName, HtmlTagSkipType.SKIP);
}
return this;
Expand All @@ -349,7 +349,7 @@ public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
public AttributeBuilder allowAttributes(String... attributeNames) {
ImmutableList.Builder<String> b = ImmutableList.builder();
for (String attributeName : attributeNames) {
b.add(HtmlLexer.canonicalName(attributeName));
b.add(HtmlLexer.canonicalAttributeName(attributeName));
}
return new AttributeBuilder(b.build());
}
Expand Down Expand Up @@ -432,7 +432,7 @@ public HtmlPolicyBuilder requireRelsOnLinks(String... linkValues) {
this.extraRelsForLinks = Sets.newLinkedHashSet();
}
for (String linkValue : linkValues) {
linkValue = HtmlLexer.canonicalName(linkValue);
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
Preconditions.checkArgument(
!Strings.containsHtmlSpace(linkValue),
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
Expand All @@ -456,7 +456,7 @@ public HtmlPolicyBuilder skipRelsOnLinks(String... linkValues) {
this.skipRelsForLinks = Sets.newLinkedHashSet();
}
for (String linkValue : linkValues) {
linkValue = HtmlLexer.canonicalName(linkValue);
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
Preconditions.checkArgument(
!Strings.containsHtmlSpace(linkValue),
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
Expand Down Expand Up @@ -980,7 +980,7 @@ public HtmlPolicyBuilder globally() {
public HtmlPolicyBuilder onElements(String... elementNames) {
ImmutableList.Builder<String> b = ImmutableList.builder();
for (String elementName : elementNames) {
b.add(HtmlLexer.canonicalName(elementName));
b.add(HtmlLexer.canonicalElementName(elementName));
}
return HtmlPolicyBuilder.this.allowAttributesOnElements(
policy, attributeNames, b.build());
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/owasp/html/HtmlSanitizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ public static void sanitize(
break;
case TAGBEGIN:
if (htmlContent.charAt(token.start + 1) == '/') { // A close tag.
receiver.closeTag(HtmlLexer.canonicalName(
receiver.closeTag(HtmlLexer.canonicalElementName(
htmlContent.substring(token.start + 2, token.end)));
while (lexer.hasNext()
&& lexer.next().type != HtmlTokenType.TAGEND) {
Expand All @@ -173,7 +173,7 @@ public static void sanitize(
} else {
attrsReadyForName = false;
}
attrs.add(HtmlLexer.canonicalName(
attrs.add(HtmlLexer.canonicalAttributeName(
htmlContent.substring(tagBodyToken.start, tagBodyToken.end)));
break;
case ATTRVALUE:
Expand All @@ -191,7 +191,7 @@ public static void sanitize(
attrs.add(attrs.getLast());
}
receiver.openTag(
HtmlLexer.canonicalName(
HtmlLexer.canonicalElementName(
htmlContent.substring(token.start + 1, token.end)),
attrs);
}
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/owasp/html/HtmlStreamRenderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ private void writeOpenTag(
attrIt.hasNext();) {
String name = attrIt.next();
String value = attrIt.next();
name = HtmlLexer.canonicalName(name);
name = HtmlLexer.canonicalAttributeName(name);
if (!isValidHtmlName(name)) {
error("Invalid attr name", name);
continue;
Expand Down Expand Up @@ -234,7 +234,7 @@ public final void closeTag(String elementName) {
private final void writeCloseTag(String uncanonElementName)
throws IOException {
if (!open) { throw new IllegalStateException(); }
String elementName = HtmlLexer.canonicalName(uncanonElementName);
String elementName = HtmlLexer.canonicalElementName(uncanonElementName);
if (!isValidHtmlName(elementName)) {
error("Invalid element name", elementName);
return;
Expand Down Expand Up @@ -386,7 +386,7 @@ static boolean isValidHtmlName(String name) {
* that has more consistent semantics.
*/
static String safeName(String unsafeElementName) {
String elementName = HtmlLexer.canonicalName(unsafeElementName);
String elementName = HtmlLexer.canonicalElementName(unsafeElementName);

// Substitute a reliably non-raw-text element for raw-text and
// plain-text elements.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void openTag(String elementName, List<String> attrs) {
if (DEBUG) {
dumpState("open " + elementName);
}
String canonElementName = HtmlLexer.canonicalName(elementName);
String canonElementName = HtmlLexer.canonicalElementName(elementName);

int elIndex = METADATA.indexForName(canonElementName);
// Treat unrecognized tags as void, but emit closing tags in closeTag().
Expand Down Expand Up @@ -238,7 +238,7 @@ public void closeTag(String elementName) {
if (DEBUG) {
dumpState("close " + elementName);
}
String canonElementName = HtmlLexer.canonicalName(elementName);
String canonElementName = HtmlLexer.canonicalElementName(elementName);

int elIndex = METADATA.indexForName(canonElementName);
if (elIndex == UNRECOGNIZED_TAG) { // Allow unrecognized end tags through.
Expand Down
19 changes: 19 additions & 0 deletions src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,25 @@ public static final void testTableStructure() {
sanitized);
}

@Test
public static final void testSvgNames() {
PolicyFactory policyFactory = new HtmlPolicyBuilder()
.allowElements("svg", "animateColor")
.allowAttributes("viewBox").onElements("svg")
.toFactory();
String svg = "<svg viewBox=\"0 0 0 0\"><animateColor></animateColor></svg>";
assertEquals(svg, policyFactory.sanitize(svg));
}

@Test
public static final void testTextareaIsNotTextArea() {
String input = "<textarea>x</textarea><textArea>y</textArea>";
PolicyFactory textareaPolicy = new HtmlPolicyBuilder().allowElements("textarea").toFactory();
PolicyFactory textAreaPolicy = new HtmlPolicyBuilder().allowElements("textArea").toFactory();
assertEquals("<textarea>x</textarea>y", textareaPolicy.sanitize(input));
assertEquals("x<textArea>y</textArea>", textAreaPolicy.sanitize(input));
}

private static String apply(HtmlPolicyBuilder b) {
return apply(b, EXAMPLE);
}
Expand Down

0 comments on commit eb6ef02

Please sign in to comment.