From eb6ef0297b5a960ac25a85a6d8a56803ccc11a0c Mon Sep 17 00:00:00 2001 From: Mike Samuel Date: Mon, 13 Jul 2020 11:27:08 -0400 Subject: [PATCH] =?UTF-8?q?Do=20not=20lcase=20element=20or=20attribute=20n?= =?UTF-8?q?ames=20that=20match=20SVG=20or=20MathML=20name=E2=80=A6=20(#206?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Do not lcase element or attribute names that match SVG or MathML names exactly > Currently all names are converted to lowercase which is ok when > you're using it for HTML only, but if there is an SVG image nested > inside the HTML it breaks. For example, when `viewBox` attribute is > converted to `viewbox` the image is not displayed correctly. This commit splits *HtmlLexer*.*canonicalName* into variants which preserve items on whitelists derived from the SVG and MathML specifications, and adjusts callers of *canonicalName* to use the appropriate variant. Fixes #182 * add unittests for mixed-case SVG names --- .../html/ElementAndAttributePolicies.java | 2 +- ...ndAttributePolicyBasedSanitizerPolicy.java | 2 +- src/main/java/org/owasp/html/HtmlLexer.java | 155 ++++++++++++++++-- .../org/owasp/html/HtmlPolicyBuilder.java | 18 +- .../java/org/owasp/html/HtmlSanitizer.java | 6 +- .../org/owasp/html/HtmlStreamRenderer.java | 6 +- .../TagBalancingHtmlStreamEventReceiver.java | 4 +- .../org/owasp/html/HtmlPolicyBuilderTest.java | 19 +++ 8 files changed, 182 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/owasp/html/ElementAndAttributePolicies.java b/src/main/java/org/owasp/html/ElementAndAttributePolicies.java index 219a3d80..8f2cdca6 100644 --- a/src/main/java/org/owasp/html/ElementAndAttributePolicies.java +++ b/src/main/java/org/owasp/html/ElementAndAttributePolicies.java @@ -35,7 +35,7 @@ /** * Encapsulates all the information needed by the - * {@link ElementAndAttributePolicySanitizerPolicy} to sanitize one kind + * {@link ElementAndAttributePolicyBasedSanitizerPolicy} to sanitize one kind * of element. */ @Immutable diff --git a/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java index d578221f..c8da4617 100644 --- a/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java +++ b/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java @@ -144,7 +144,7 @@ public void openTag(String elementName, List attrs) { adjustedElementName = policies.elPolicy.apply(elementName, attrs); if (adjustedElementName != null) { - adjustedElementName = HtmlLexer.canonicalName(adjustedElementName); + adjustedElementName = HtmlLexer.canonicalElementName(adjustedElementName); } } else { adjustedElementName = null; diff --git a/src/main/java/org/owasp/html/HtmlLexer.java b/src/main/java/org/owasp/html/HtmlLexer.java index ed326b15..00fcc7dc 100644 --- a/src/main/java/org/owasp/html/HtmlLexer.java +++ b/src/main/java/org/owasp/html/HtmlLexer.java @@ -55,11 +55,27 @@ public HtmlLexer(String input) { /** * Normalize case of names that are not name-spaced. This lower-cases HTML - * element and attribute names, but not ones for embedded SVG or MATHML. + * element names, but not ones for embedded SVG or MathML. */ - static String canonicalName(String elementOrAttribName) { - return elementOrAttribName.indexOf(':') >= 0 - ? elementOrAttribName : Strings.toLowerCase(elementOrAttribName); + static String canonicalElementName(String elementName) { + return elementName.indexOf(':') >= 0 || mixedCaseForeignElementNames.contains(elementName) + ? elementName : Strings.toLowerCase(elementName); + } + + /** + * Normalize case of names that are not name-spaced. This lower-cases HTML + * attribute names, but not ones for embedded SVG or MathML. + */ + static String canonicalAttributeName(String attribName) { + return attribName.indexOf(':') >= 0 || mixedCaseForeignAttributeNames.contains(attribName) + ? attribName : Strings.toLowerCase(attribName); + } + + /** + * Normalize case of keywords in attribute values. + */ + public static String canonicalKeywordAttributeValue(String keywordValue) { + return Strings.toLowerCase(keywordValue); } /** @@ -243,9 +259,7 @@ private void pushbackToken(HtmlToken token) { /** Can the attribute appear in HTML without a value. */ private static boolean isValuelessAttribute(String attribName) { - boolean valueless = VALUELESS_ATTRIB_NAMES.contains( - Strings.toLowerCase(attribName)); - return valueless; + return VALUELESS_ATTRIB_NAMES.contains(canonicalAttributeName(attribName)); } // From http://issues.apache.org/jira/browse/XALANC-519 @@ -253,6 +267,125 @@ private static boolean isValuelessAttribute(String attribName) { "checked", "compact", "declare", "defer", "disabled", "ismap", "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", "selected"); + + private static final ImmutableSet mixedCaseForeignAttributeNames = ImmutableSet.of( + "attributeName", + "attributeType", + "baseFrequency", + "baseProfile", + "calcMode", + "clipPathUnits", + "contentScriptType", + "defaultAction", + "definitionURL", + "diffuseConstant", + "edgeMode", + "externalResourcesRequired", + "filterUnits", + "focusHighlight", + "gradientTransform", + "gradientUnits", + "initialVisibility", + "kernelMatrix", + "kernelUnitLength", + "keyPoints", + "keySplines", + "keyTimes", + "lengthAdjust", + "limitingConeAngle", + "markerHeight", + "markerUnits", + "markerWidth", + "maskContentUnits", + "maskUnits", + "mediaCharacterEncoding", + "mediaContentEncodings", + "mediaSize", + "mediaTime", + "numOctaves", + "pathLength", + "patternContentUnits", + "patternTransform", + "patternUnits", + "playbackOrder", + "pointsAtX", + "pointsAtY", + "pointsAtZ", + "preserveAlpha", + "preserveAspectRatio", + "primitiveUnits", + "refX", + "refY", + "repeatCount", + "repeatDur", + "requiredExtensions", + "requiredFeatures", + "requiredFonts", + "requiredFormats", + "schemaLocation", + "snapshotTime", + "specularConstant", + "specularExponent", + "spreadMethod", + "startOffset", + "stdDeviation", + "stitchTiles", + "surfaceScale", + "syncBehavior", + "syncBehaviorDefault", + "syncMaster", + "syncTolerance", + "syncToleranceDefault", + "systemLanguage", + "tableValues", + "targetX", + "targetY", + "textLength", + "timelineBegin", + "transformBehavior", + "viewBox", + "xChannelSelector", + "yChannelSelector", + "zoomAndPan" + ); + + private static final ImmutableSet mixedCaseForeignElementNames = ImmutableSet.of( + "animateColor", + "animateMotion", + "animateTransform", + "clipPath", + "feBlend", + "feColorMatrix", + "feComponentTransfer", + "feComposite", + "feConvolveMatrix", + "feDiffuseLighting", + "feDisplacementMap", + "feDistantLight", + "feDropShadow", + "feFlood", + "feFuncA", + "feFuncB", + "feFuncG", + "feFuncR", + "feGaussianBlur", + "feImage", + "feMerge", + "feMergeNode", + "feMorphology", + "feOffset", + "fePointLight", + "feSpecularLighting", + "feSpotLight", + "feTile", + "feTurbulence", + "foreignObject", + "linearGradient", + "radialGradient", + "solidColor", + "textArea", + "textPath" + ); } /** @@ -311,7 +444,7 @@ protected HtmlToken produce() { switch (token.type) { case TAGBEGIN: { - String canonTagName = canonicalName( + String canonTagName = canonicalElementName( token.start + 1, token.end); if (HtmlTextEscapingMode.isTagFollowedByLiteralContent( canonTagName)) { @@ -478,7 +611,7 @@ private HtmlToken parseToken() { if (this.inEscapeExemptBlock && '/' == input.charAt(start + 1) && textEscapingMode != HtmlTextEscapingMode.PLAIN_TEXT - && canonicalName(start + 2, end) + && canonicalElementName(start + 2, end) .equals(escapeExemptTagName)) { this.inEscapeExemptBlock = false; this.escapeExemptTagName = null; @@ -612,8 +745,8 @@ && canonicalName(start + 2, end) return result; } - private String canonicalName(int start, int end) { - return HtmlLexer.canonicalName(input.substring(start, end)); + private String canonicalElementName(int start, int end) { + return HtmlLexer.canonicalElementName(input.substring(start, end)); } private static boolean isIdentStart(char ch) { diff --git a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java index 46b42230..aa1b51a5 100644 --- a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java +++ b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java @@ -238,7 +238,7 @@ public HtmlPolicyBuilder allowElements( ElementPolicy policy, String... elementNames) { invalidateCompiledState(); for (String elementName : elementNames) { - elementName = HtmlLexer.canonicalName(elementName); + elementName = HtmlLexer.canonicalElementName(elementName); ElementPolicy newPolicy = ElementPolicy.Util.join( elPolicies.get(elementName), policy); // Don't remove if newPolicy is the always reject policy since we want @@ -286,7 +286,7 @@ public HtmlPolicyBuilder allowCommonBlockElements() { public HtmlPolicyBuilder allowTextIn(String... elementNames) { invalidateCompiledState(); for (String elementName : elementNames) { - elementName = HtmlLexer.canonicalName(elementName); + elementName = HtmlLexer.canonicalElementName(elementName); textContainers.put(elementName, true); } return this; @@ -305,7 +305,7 @@ public HtmlPolicyBuilder allowTextIn(String... elementNames) { public HtmlPolicyBuilder disallowTextIn(String... elementNames) { invalidateCompiledState(); for (String elementName : elementNames) { - elementName = HtmlLexer.canonicalName(elementName); + elementName = HtmlLexer.canonicalElementName(elementName); textContainers.put(elementName, false); } return this; @@ -321,7 +321,7 @@ public HtmlPolicyBuilder disallowTextIn(String... elementNames) { public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) { invalidateCompiledState(); for (String elementName : elementNames) { - elementName = HtmlLexer.canonicalName(elementName); + elementName = HtmlLexer.canonicalElementName(elementName); skipIssueTagMap.put(elementName, HtmlTagSkipType.DO_NOT_SKIP); } return this; @@ -336,7 +336,7 @@ public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) { public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) { invalidateCompiledState(); for (String elementName : elementNames) { - elementName = HtmlLexer.canonicalName(elementName); + elementName = HtmlLexer.canonicalElementName(elementName); skipIssueTagMap.put(elementName, HtmlTagSkipType.SKIP); } return this; @@ -349,7 +349,7 @@ public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) { public AttributeBuilder allowAttributes(String... attributeNames) { ImmutableList.Builder b = ImmutableList.builder(); for (String attributeName : attributeNames) { - b.add(HtmlLexer.canonicalName(attributeName)); + b.add(HtmlLexer.canonicalAttributeName(attributeName)); } return new AttributeBuilder(b.build()); } @@ -432,7 +432,7 @@ public HtmlPolicyBuilder requireRelsOnLinks(String... linkValues) { this.extraRelsForLinks = Sets.newLinkedHashSet(); } for (String linkValue : linkValues) { - linkValue = HtmlLexer.canonicalName(linkValue); + linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue); Preconditions.checkArgument( !Strings.containsHtmlSpace(linkValue), "spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")"); @@ -456,7 +456,7 @@ public HtmlPolicyBuilder skipRelsOnLinks(String... linkValues) { this.skipRelsForLinks = Sets.newLinkedHashSet(); } for (String linkValue : linkValues) { - linkValue = HtmlLexer.canonicalName(linkValue); + linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue); Preconditions.checkArgument( !Strings.containsHtmlSpace(linkValue), "spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")"); @@ -980,7 +980,7 @@ public HtmlPolicyBuilder globally() { public HtmlPolicyBuilder onElements(String... elementNames) { ImmutableList.Builder b = ImmutableList.builder(); for (String elementName : elementNames) { - b.add(HtmlLexer.canonicalName(elementName)); + b.add(HtmlLexer.canonicalElementName(elementName)); } return HtmlPolicyBuilder.this.allowAttributesOnElements( policy, attributeNames, b.build()); diff --git a/src/main/java/org/owasp/html/HtmlSanitizer.java b/src/main/java/org/owasp/html/HtmlSanitizer.java index f9e932e8..63d7ae95 100644 --- a/src/main/java/org/owasp/html/HtmlSanitizer.java +++ b/src/main/java/org/owasp/html/HtmlSanitizer.java @@ -152,7 +152,7 @@ public static void sanitize( break; case TAGBEGIN: if (htmlContent.charAt(token.start + 1) == '/') { // A close tag. - receiver.closeTag(HtmlLexer.canonicalName( + receiver.closeTag(HtmlLexer.canonicalElementName( htmlContent.substring(token.start + 2, token.end))); while (lexer.hasNext() && lexer.next().type != HtmlTokenType.TAGEND) { @@ -173,7 +173,7 @@ public static void sanitize( } else { attrsReadyForName = false; } - attrs.add(HtmlLexer.canonicalName( + attrs.add(HtmlLexer.canonicalAttributeName( htmlContent.substring(tagBodyToken.start, tagBodyToken.end))); break; case ATTRVALUE: @@ -191,7 +191,7 @@ public static void sanitize( attrs.add(attrs.getLast()); } receiver.openTag( - HtmlLexer.canonicalName( + HtmlLexer.canonicalElementName( htmlContent.substring(token.start + 1, token.end)), attrs); } diff --git a/src/main/java/org/owasp/html/HtmlStreamRenderer.java b/src/main/java/org/owasp/html/HtmlStreamRenderer.java index 019895a5..16645531 100644 --- a/src/main/java/org/owasp/html/HtmlStreamRenderer.java +++ b/src/main/java/org/owasp/html/HtmlStreamRenderer.java @@ -187,7 +187,7 @@ private void writeOpenTag( attrIt.hasNext();) { String name = attrIt.next(); String value = attrIt.next(); - name = HtmlLexer.canonicalName(name); + name = HtmlLexer.canonicalAttributeName(name); if (!isValidHtmlName(name)) { error("Invalid attr name", name); continue; @@ -234,7 +234,7 @@ public final void closeTag(String elementName) { private final void writeCloseTag(String uncanonElementName) throws IOException { if (!open) { throw new IllegalStateException(); } - String elementName = HtmlLexer.canonicalName(uncanonElementName); + String elementName = HtmlLexer.canonicalElementName(uncanonElementName); if (!isValidHtmlName(elementName)) { error("Invalid element name", elementName); return; @@ -386,7 +386,7 @@ static boolean isValidHtmlName(String name) { * that has more consistent semantics. */ static String safeName(String unsafeElementName) { - String elementName = HtmlLexer.canonicalName(unsafeElementName); + String elementName = HtmlLexer.canonicalElementName(unsafeElementName); // Substitute a reliably non-raw-text element for raw-text and // plain-text elements. diff --git a/src/main/java/org/owasp/html/TagBalancingHtmlStreamEventReceiver.java b/src/main/java/org/owasp/html/TagBalancingHtmlStreamEventReceiver.java index c94d5735..aa2eb075 100644 --- a/src/main/java/org/owasp/html/TagBalancingHtmlStreamEventReceiver.java +++ b/src/main/java/org/owasp/html/TagBalancingHtmlStreamEventReceiver.java @@ -97,7 +97,7 @@ public void openTag(String elementName, List attrs) { if (DEBUG) { dumpState("open " + elementName); } - String canonElementName = HtmlLexer.canonicalName(elementName); + String canonElementName = HtmlLexer.canonicalElementName(elementName); int elIndex = METADATA.indexForName(canonElementName); // Treat unrecognized tags as void, but emit closing tags in closeTag(). @@ -238,7 +238,7 @@ public void closeTag(String elementName) { if (DEBUG) { dumpState("close " + elementName); } - String canonElementName = HtmlLexer.canonicalName(elementName); + String canonElementName = HtmlLexer.canonicalElementName(elementName); int elIndex = METADATA.indexForName(canonElementName); if (elIndex == UNRECOGNIZED_TAG) { // Allow unrecognized end tags through. diff --git a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java index a06fafee..db75e4c7 100644 --- a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java +++ b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java @@ -975,6 +975,25 @@ public static final void testTableStructure() { sanitized); } + @Test + public static final void testSvgNames() { + PolicyFactory policyFactory = new HtmlPolicyBuilder() + .allowElements("svg", "animateColor") + .allowAttributes("viewBox").onElements("svg") + .toFactory(); + String svg = ""; + assertEquals(svg, policyFactory.sanitize(svg)); + } + + @Test + public static final void testTextareaIsNotTextArea() { + String input = ""; + PolicyFactory textareaPolicy = new HtmlPolicyBuilder().allowElements("textarea").toFactory(); + PolicyFactory textAreaPolicy = new HtmlPolicyBuilder().allowElements("textArea").toFactory(); + assertEquals("y", textareaPolicy.sanitize(input)); + assertEquals("x", textAreaPolicy.sanitize(input)); + } + private static String apply(HtmlPolicyBuilder b) { return apply(b, EXAMPLE); }