diff --git a/docs/maven.md b/docs/maven.md index 1983a31b..4a4c4670 100644 --- a/docs/maven.md +++ b/docs/maven.md @@ -1,4 +1,4 @@ -# Using with Maven +# Using with Maven The HTML Sanitizer is available from [Maven Central](https://search.maven.org/#browse%7C84770979) diff --git a/html-types/src/main/java/org/owasp/html/htmltypes/SafeHtmlMint.java b/html-types/src/main/java/org/owasp/html/htmltypes/SafeHtmlMint.java index 1027da03..c16a1a7f 100644 --- a/html-types/src/main/java/org/owasp/html/htmltypes/SafeHtmlMint.java +++ b/html-types/src/main/java/org/owasp/html/htmltypes/SafeHtmlMint.java @@ -33,6 +33,7 @@ import com.google.common.html.types.SafeHtml; import com.google.common.html.types.UncheckedConversions; +import org.owasp.html.Context; import org.owasp.html.HtmlChangeListener; import org.owasp.html.PolicyFactory; @@ -67,27 +68,57 @@ private SafeHtmlMint(PolicyFactory f) { /** A convenience function that sanitizes a string of HTML. */ public SafeHtml sanitize(@Nullable String html) { - return sanitize(html, null, null); + return sanitize(html, Context.DEFAULT, null, null); + } + + /** A convenience function that sanitizes a string of HTML. */ + public SafeHtml sanitize(@Nullable String html, @Nullable Context context) { + return sanitize(html, context, null, null); } /** * A convenience function that sanitizes a string of HTML and reports * the names of rejected element and attributes to listener. * @param html the string of HTML to sanitize. + * @param context the context of the document that will embed the output. * @param listener if non-null, receives notifications of tags and attributes * that were rejected by the policy. This may tie into intrusion * detection systems. - * @param context if {@code (listener != null)} then the context value passed - * with notifications. This can be used to let the listener know from - * which connection or request the questionable HTML was received. + * @param listenerContext if {@code (listener != null)} then the context + * value passed with notifications. This can be used to let the listener + * know from which connection or request the questionable HTML was + * received. * @return a string of safe HTML assuming the input policy factory produces * safe HTML. */ public SafeHtml sanitize( @Nullable String html, - @Nullable HtmlChangeListener listener, @Nullable CTX context) { + @Nullable HtmlChangeListener listener, + @Nullable CTX listenerContext) { + return sanitize(html, Context.DEFAULT, listener, listenerContext); + } + + /** + * A convenience function that sanitizes a string of HTML and reports + * the names of rejected element and attributes to listener. + * @param html the string of HTML to sanitize. + * @param context the context of the document that will embed the output. + * @param listener if non-null, receives notifications of tags and attributes + * that were rejected by the policy. This may tie into intrusion + * detection systems. + * @param listenerContext if {@code (listener != null)} then the context + * value passed with notifications. This can be used to let the listener + * know from which connection or request the questionable HTML was + * received. + * @return a string of safe HTML assuming the input policy factory produces + * safe HTML. + */ + public SafeHtml sanitize( + @Nullable String html, @Nullable Context context, + @Nullable HtmlChangeListener listener, + @Nullable CTX listenerContext) { if (html == null) { return SafeHtml.EMPTY; } return UncheckedConversions.safeHtmlFromStringKnownToSatisfyTypeContract( - f.sanitize(html, listener, context)); + f.sanitize(html, context, listener, listenerContext)); } } diff --git a/parent/pom.xml b/parent/pom.xml index cd25d9b4..d3a59a53 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -214,12 +214,23 @@ application while protecting against XSS. [2.0.1,) provided + + org.owasp + url + 1.2.4 + junit junit 4.12 test + + nu.validator.htmlparser + htmlparser + 1.4 + test + diff --git a/pom.xml b/pom.xml index 933ea025..7a5ccd93 100644 --- a/pom.xml +++ b/pom.xml @@ -67,6 +67,10 @@ annotations provided + + org.owasp + url + junit junit @@ -75,7 +79,6 @@ nu.validator.htmlparser htmlparser - 1.4 test diff --git a/src/main/java/org/owasp/html/AttributePolicy.java b/src/main/java/org/owasp/html/AttributePolicy.java index 7f279db5..ef7b8aa0 100644 --- a/src/main/java/org/owasp/html/AttributePolicy.java +++ b/src/main/java/org/owasp/html/AttributePolicy.java @@ -31,6 +31,7 @@ import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; +import java.util.Collections; import java.util.Set; import javax.annotation.CheckReturnValue; @@ -55,25 +56,64 @@ * * @return {@code null} to disallow the attribute or the adjusted value if * allowed. + * @deprecated prefer {@link V2#apply(String, String, String, Context)} */ + @Deprecated public @Nullable String apply( String elementName, String attributeName, String value); + /** + * Extends AttributePolicy that receives the embedding document context. + */ + public interface V2 extends AttributePolicy { + /** + * @param elementName the lower-case element name. + * @param attributeName the lower-case attribute name. + * @param value the attribute value without quotes and with HTML entities + * decoded. + * @param context about the document in which the sanitized attribute will + * be embedded. + * + * @return {@code null} to disallow the attribute or the adjusted value if + * allowed. + */ + public @Nullable String apply( + String elementName, String attributeName, String value, + Context context); + } + + /** Utilities for working with attribute policies. */ public static final class Util { + static Iterable unjoin(AttributePolicy.V2 p) { + if (p instanceof JoinedAttributePolicy) { + return ((JoinedAttributePolicy) p).policies; + } else { + return Collections.singleton(p); + } + } + + /** Adapts an old-style attribute policy to the new interface. */ + public static V2 adapt(AttributePolicy p) { + if (p instanceof V2) { + return (V2) p; + } + return new AttributePolicyAdapter(p); + } + /** * An attribute policy equivalent to applying all the given policies in * order, failing early if any of them fails. */ @CheckReturnValue - public static final AttributePolicy join(AttributePolicy... policies) { + public static final AttributePolicy.V2 join(AttributePolicy... policies) { AttributePolicyJoiner joiner = new AttributePolicyJoiner(); for (AttributePolicy p : policies) { if (p != null) { - joiner.unroll(p); + joiner.unroll(adapt(p)); } } @@ -81,17 +121,17 @@ public static final AttributePolicy join(AttributePolicy... policies) { } static final class AttributePolicyJoiner - extends JoinHelper { + extends JoinHelper { AttributePolicyJoiner() { - super(AttributePolicy.class, + super(AttributePolicy.V2.class, JoinableAttributePolicy.class, REJECT_ALL_ATTRIBUTE_POLICY, IDENTITY_ATTRIBUTE_POLICY); } @Override - Optional> split(AttributePolicy x) { + Optional> split(AttributePolicy.V2 x) { if (x instanceof JoinedAttributePolicy) { return Optional.of(((JoinedAttributePolicy) x).policies); } else { @@ -100,34 +140,62 @@ Optional> split(AttributePolicy x) { } @Override - AttributePolicy rejoin(Set xs) { + AttributePolicy.V2 rejoin(Set xs) { return new JoinedAttributePolicy(xs); } } + + /** The old apply method forwards a null context to the V2 apply method. */ + public static abstract class AbstractV2AttributePolicy implements V2 { + public final @Nullable String apply( + String elementName, String attributeName, String value) { + return apply(elementName, attributeName, value, null); + } + } + + static final class AttributePolicyAdapter + extends AbstractV2AttributePolicy { + + final AttributePolicy p; + + AttributePolicyAdapter(AttributePolicy p) { + this.p = p; + } + + public String apply( + String elementName, String attributeName, String value, + Context context) { + return p.apply(elementName, attributeName, value); + } + + } } + /** An attribute policy that returns the value unchanged. */ - public static final AttributePolicy IDENTITY_ATTRIBUTE_POLICY - = new AttributePolicy() { + public static final AttributePolicy.V2 IDENTITY_ATTRIBUTE_POLICY + = new Util.AbstractV2AttributePolicy() { public String apply( - String elementName, String attributeName, String value) { + String elementName, String attributeName, String value, + Context context) { return value; } }; /** An attribute policy that rejects all values. */ - public static final AttributePolicy REJECT_ALL_ATTRIBUTE_POLICY - = new AttributePolicy() { + public static final AttributePolicy.V2 REJECT_ALL_ATTRIBUTE_POLICY + = new Util.AbstractV2AttributePolicy() { public @Nullable String apply( - String elementName, String attributeName, String value) { + String elementName, String attributeName, String value, + Context context) { return null; } }; /** An attribute policy that is joinable. */ static interface JoinableAttributePolicy - extends AttributePolicy, Joinable { + extends AttributePolicy.V2, Joinable { // Parameterized Appropriately. } } diff --git a/src/main/java/org/owasp/html/Context.java b/src/main/java/org/owasp/html/Context.java new file mode 100644 index 00000000..c1b3e9ee --- /dev/null +++ b/src/main/java/org/owasp/html/Context.java @@ -0,0 +1,55 @@ +// Copyright (c) 2017, Mike Samuel +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// Neither the name of the OWASP nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +package org.owasp.html; + +import org.owasp.url.UrlContext; + +/** + * The context in which the sanitized output will be used. + */ +public final class Context { + private final UrlContext urlContext; + + /** A least common denominator context. */ + public static final Context DEFAULT = new Context(UrlContext.DEFAULT); + + /** + * @param urlContext The URL context for the embedding document. + */ + public Context(UrlContext urlContext) { + this.urlContext = urlContext; + } + + /** + * The URL context for the embedding document. + */ + public UrlContext urlContext() { + return urlContext; + } +} diff --git a/src/main/java/org/owasp/html/ElementAndAttributePolicies.java b/src/main/java/org/owasp/html/ElementAndAttributePolicies.java index e745473d..a5d45576 100644 --- a/src/main/java/org/owasp/html/ElementAndAttributePolicies.java +++ b/src/main/java/org/owasp/html/ElementAndAttributePolicies.java @@ -41,14 +41,14 @@ @Immutable final class ElementAndAttributePolicies { final String elementName; - final ElementPolicy elPolicy; - final ImmutableMap attrPolicies; + final ElementPolicy.V2 elPolicy; + final ImmutableMap attrPolicies; final boolean skipIfEmpty; ElementAndAttributePolicies( String elementName, - ElementPolicy elPolicy, - Map + ElementPolicy.V2 elPolicy, + Map attrPolicies, boolean skipIfEmpty) { this.elementName = elementName; @@ -60,18 +60,19 @@ final class ElementAndAttributePolicies { ElementAndAttributePolicies and(ElementAndAttributePolicies p) { assert elementName.equals(p.elementName): elementName + " != " + p.elementName; - ImmutableMap.Builder joinedAttrPolicies + ImmutableMap.Builder joinedAttrPolicies = ImmutableMap.builder(); - for (Map.Entry e : this.attrPolicies.entrySet()) { + for (Map.Entry e + : this.attrPolicies.entrySet()) { String attrName = e.getKey(); - AttributePolicy a = e.getValue(); - AttributePolicy b = p.attrPolicies.get(attrName); + AttributePolicy.V2 a = e.getValue(); + AttributePolicy.V2 b = p.attrPolicies.get(attrName); if (b != null) { a = AttributePolicy.Util.join(a, b); } joinedAttrPolicies.put(attrName, a); } - for (Map.Entry e : p.attrPolicies.entrySet()) { + for (Map.Entry e : p.attrPolicies.entrySet()) { String attrName = e.getKey(); if (!this.attrPolicies.containsKey(attrName)) { joinedAttrPolicies.put(attrName, e.getValue()); @@ -99,15 +100,16 @@ ElementAndAttributePolicies and(ElementAndAttributePolicies p) { } ElementAndAttributePolicies andGlobals( - Map globalAttrPolicies) { + Map globalAttrPolicies) { if (globalAttrPolicies.isEmpty()) { return this; } - Map anded = null; - for (Map.Entry e : this.attrPolicies.entrySet()) { + Map anded = null; + for (Map.Entry e + : this.attrPolicies.entrySet()) { String attrName = e.getKey(); - AttributePolicy globalAttrPolicy = globalAttrPolicies.get(attrName); + AttributePolicy.V2 globalAttrPolicy = globalAttrPolicies.get(attrName); if (globalAttrPolicy != null) { - AttributePolicy attrPolicy = e.getValue(); - AttributePolicy joined = AttributePolicy.Util.join( + AttributePolicy.V2 attrPolicy = e.getValue(); + AttributePolicy.V2 joined = AttributePolicy.Util.join( attrPolicy, globalAttrPolicy); if (!joined.equals(attrPolicy)) { if (anded == null) { @@ -118,7 +120,8 @@ ElementAndAttributePolicies andGlobals( } } } - for (Map.Entry e : globalAttrPolicies.entrySet()) { + for (Map.Entry e + : globalAttrPolicies.entrySet()) { String attrName = e.getKey(); if (!this.attrPolicies.containsKey(attrName)) { if (anded == null) { diff --git a/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java index c643b734..65b1d40c 100644 --- a/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java +++ b/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java @@ -48,6 +48,7 @@ class ElementAndAttributePolicyBasedSanitizerPolicy final ImmutableMap elAndAttrPolicies; final ImmutableSet allowedTextContainers; private final HtmlStreamEventReceiver out; + private final Context context; /** * True to skip textual content. Used to ignore the content of embedded CDATA * content that is not meant to be human-readable. @@ -61,9 +62,11 @@ class ElementAndAttributePolicyBasedSanitizerPolicy ElementAndAttributePolicyBasedSanitizerPolicy( HtmlStreamEventReceiver out, + Context context, ImmutableMap elAndAttrPolicies, ImmutableSet allowedTextContainers) { this.out = out; + this.context = context; this.elAndAttrPolicies = elAndAttrPolicies; this.allowedTextContainers = allowedTextContainers; } @@ -102,7 +105,8 @@ public void openTag(String elementName, List attrs) { // to refactor it into multiple method bodies, so if you change this, // check the override of it in that class. ElementAndAttributePolicies policies = elAndAttrPolicies.get(elementName); - String adjustedElementName = applyPolicies(elementName, attrs, policies); + String adjustedElementName = applyPolicies( + elementName, attrs, context, policies); if (adjustedElementName != null && !(attrs.isEmpty() && policies.skipIfEmpty)) { writeOpenTag(policies, adjustedElementName, attrs); @@ -112,14 +116,14 @@ public void openTag(String elementName, List attrs) { } static final @Nullable String applyPolicies( - String elementName, List attrs, + String elementName, List attrs, Context context, ElementAndAttributePolicies policies) { String adjustedElementName; if (policies != null) { for (ListIterator attrsIt = attrs.listIterator(); attrsIt.hasNext();) { String name = attrsIt.next(); - AttributePolicy attrPolicy + AttributePolicy.V2 attrPolicy = policies.attrPolicies.get(name); if (attrPolicy == null) { attrsIt.remove(); @@ -127,7 +131,8 @@ public void openTag(String elementName, List attrs) { attrsIt.remove(); } else { String value = attrsIt.next(); - String adjustedValue = attrPolicy.apply(elementName, name, value); + String adjustedValue = attrPolicy.apply( + elementName, name, value, context); if (adjustedValue == null) { attrsIt.remove(); attrsIt.previous(); @@ -142,7 +147,8 @@ public void openTag(String elementName, List attrs) { // are unique. removeDuplicateAttributes(attrs); - adjustedElementName = policies.elPolicy.apply(elementName, attrs); + adjustedElementName = policies.elPolicy.apply( + elementName, attrs, context); if (adjustedElementName != null) { adjustedElementName = HtmlLexer.canonicalName(adjustedElementName); } diff --git a/src/main/java/org/owasp/html/ElementPolicy.java b/src/main/java/org/owasp/html/ElementPolicy.java index 689dec6b..a2cbb3a0 100644 --- a/src/main/java/org/owasp/html/ElementPolicy.java +++ b/src/main/java/org/owasp/html/ElementPolicy.java @@ -58,39 +58,69 @@ * careful to remove both the name and its associated value. * * @return {@code null} to disallow the element, or the adjusted element name. + * @deprecated prefer {@link V2#apply(String, List, Context)} */ + @Deprecated public @Nullable String apply(String elementName, List attrs); + /** + * Extends ElementPolicy that additionally receives the context in which + * the sanitized HTML is embedded. + */ + public interface V2 extends ElementPolicy { + /** + * @param elementName the lower-case element name. + * @param attrs a list of alternating attribute names and values. + * The list may be added to or removed from. When removing, be + * careful to remove both the name and its associated value. + * @param context the context in which the sanitized result will be used. + * + * @return {@code null} to disallow the element, or the adjusted element + * name. + */ + public @Nullable String apply( + String elementName, List attrs, Context context); + } + + /** Utilities for working with element policies. */ public static final class Util { private Util() { /* uninstantiable */ } + /** Adapts an old-style element policy to the new form. */ + public static ElementPolicy.V2 adapt(ElementPolicy p) { + if (p instanceof ElementPolicy.V2) { + return (V2) p; + } + return new ElementPolicyAdapter(p); + } + /** * Given zero or more element policies, returns an element policy equivalent * to applying them in order failing early if any of them fails. */ - public static final ElementPolicy join(ElementPolicy... policies) { + public static final ElementPolicy.V2 join(ElementPolicy... policies) { PolicyJoiner joiner = new PolicyJoiner(); for (ElementPolicy p : policies) { if (p != null) { - joiner.unroll(p); + joiner.unroll(adapt(p)); } } return joiner.join(); } static final class PolicyJoiner - extends JoinHelper { + extends JoinHelper { PolicyJoiner() { super( - ElementPolicy.class, JoinableElementPolicy.class, + ElementPolicy.V2.class, JoinableElementPolicy.class, REJECT_ALL_ELEMENT_POLICY, IDENTITY_ELEMENT_POLICY); } @Override - Optional> split(ElementPolicy x) { + Optional> split(ElementPolicy.V2 x) { if (x instanceof JoinedElementPolicy) { return Optional.of(((JoinedElementPolicy) x).policies); } @@ -98,47 +128,73 @@ Optional> split(ElementPolicy x) { } @Override - ElementPolicy rejoin(Set xs) { + ElementPolicy.V2 rejoin(Set xs) { return new JoinedElementPolicy(xs); } } + + + static abstract class AbstractV2ElementPolicy implements ElementPolicy.V2 { + public String apply(String elementName, List attrs) { + return apply(elementName, attrs, null); + } + } + + static final class ElementPolicyAdapter extends AbstractV2ElementPolicy { + final ElementPolicy p; + + public ElementPolicyAdapter(ElementPolicy p) { + this.p = p; + } + + public String apply( + String elementName, List attrs, Context context) { + return p.apply(elementName, attrs); + } + } + } /** An element policy that returns the element unchanged. */ - public static final ElementPolicy IDENTITY_ELEMENT_POLICY - = new ElementPolicy() { - public String apply(String elementName, List attrs) { + public static final ElementPolicy.V2 IDENTITY_ELEMENT_POLICY + = new Util.AbstractV2ElementPolicy() { + public String apply( + String elementName, List attrs, Context context) { return elementName; } }; /** An element policy that rejects all elements. */ - public static final ElementPolicy REJECT_ALL_ELEMENT_POLICY - = new ElementPolicy() { - public @Nullable String apply(String elementName, List attrs) { + public static final ElementPolicy.V2 REJECT_ALL_ELEMENT_POLICY + = new Util.AbstractV2ElementPolicy() { + + public @Nullable String apply( + String elementName, List attrs, Context context) { return null; } }; @SuppressWarnings("javadoc") static interface JoinableElementPolicy - extends ElementPolicy, Joinable { + extends ElementPolicy.V2, Joinable { // Parameterized appropriately. } } @Immutable -final class JoinedElementPolicy implements ElementPolicy { - final ImmutableList policies; +final class JoinedElementPolicy +extends ElementPolicy.Util.AbstractV2ElementPolicy { + final ImmutableList policies; - JoinedElementPolicy(Iterable policies) { + JoinedElementPolicy(Iterable policies) { this.policies = ImmutableList.copyOf(policies); } - public @Nullable String apply(String elementName, List attrs) { + public @Nullable String apply( + String elementName, List attrs, Context context) { String filteredElementName = elementName; - for (ElementPolicy part : policies) { - filteredElementName = part.apply(filteredElementName, attrs); + for (ElementPolicy.V2 part : policies) { + filteredElementName = part.apply(filteredElementName, attrs, context); if (filteredElementName == null) { break; } } return filteredElementName; diff --git a/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java b/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java index 54dd10ae..9702b46a 100644 --- a/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java +++ b/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java @@ -55,7 +55,8 @@ * @author Mike Samuel (mikesamuel@gmail.com) */ @TCB -public class FilterUrlByProtocolAttributePolicy implements AttributePolicy { +public class FilterUrlByProtocolAttributePolicy +extends AttributePolicy.Util.AbstractV2AttributePolicy { private final ImmutableSet protocols; /** @@ -67,7 +68,8 @@ public FilterUrlByProtocolAttributePolicy( } public @Nullable String apply( - String elementName, String attributeName, String value) { + String elementName, String attributeName, String value, + Context context) { String url = Strings.stripHtmlSpaces(value); protocol_loop: for (int i = 0, n = url.length(); i < n; ++i) { diff --git a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java index 93df04f8..6a88c20c 100644 --- a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java +++ b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java @@ -37,8 +37,12 @@ import javax.annotation.concurrent.NotThreadSafe; import org.owasp.html.ElementPolicy.JoinableElementPolicy; +import org.owasp.url.Classification; +import org.owasp.url.Diagnostic; +import org.owasp.url.UrlClassifier; +import org.owasp.url.UrlContext; +import org.owasp.url.UrlValue; -import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -184,10 +188,11 @@ public class HtmlPolicyBuilder { static final String DEFAULT_RELS_ON_TARGETTED_LINKS_STR = Joiner.on(' ').join(DEFAULT_RELS_ON_TARGETTED_LINKS); - private final Map elPolicies = Maps.newLinkedHashMap(); - private final Map> attrPolicies + private final Map elPolicies = Maps.newLinkedHashMap(); - private final Map globalAttrPolicies + private final Map> attrPolicies + = Maps.newLinkedHashMap(); + private final Map globalAttrPolicies = Maps.newLinkedHashMap(); private final Set allowedProtocols = Sets.newLinkedHashSet(); private final Set skipIfEmpty = Sets.newLinkedHashSet( @@ -231,7 +236,7 @@ public HtmlPolicyBuilder allowElements( invalidateCompiledState(); for (String elementName : elementNames) { elementName = HtmlLexer.canonicalName(elementName); - ElementPolicy newPolicy = ElementPolicy.Util.join( + ElementPolicy.V2 newPolicy = ElementPolicy.Util.join( elPolicies.get(elementName), policy); // Don't remove if newPolicy is the always reject policy since we want // that to infect later allowElement calls for this particular element @@ -382,7 +387,8 @@ private HtmlPolicyBuilder allowAttributesOnElements( List elementNames) { invalidateCompiledState(); for (String elementName : elementNames) { - Map policies = attrPolicies.get(elementName); + Map policies = + attrPolicies.get(elementName); if (policies == null) { policies = Maps.newLinkedHashMap(); attrPolicies.put(elementName, policies); @@ -437,8 +443,9 @@ public HtmlPolicyBuilder requireRelsOnLinks(String... linkValues) { } /** - * Opts out of some of the {@link #DEFAULT_RELS_ON_TARGETTED_LINKS} from being added - * to links, and reverses pre + * Opts out of some of the {@link #DEFAULT_RELS_ON_TARGETTED_LINKS} from + * being added to links, and reverses previous calls to + * {@link #requireRelsOnLinks(String...)} with the same arguments. * * @see #requireRelsOnLinks */ @@ -472,7 +479,14 @@ public HtmlPolicyBuilder skipRelsOnLinks(String... linkValues) { *

* Do not allow any *script such as javascript * protocols if you might use this policy with untrusted code. + * + *

This does not affect URLs vetted by + * {@link AttributeBuilder#matching(UrlClassifier)} since + * those classifiers whitelist protocols. + * + * @deprecated Prefer {@link AttributeBuilder#matching(UrlClassifier)} */ + @Deprecated public HtmlPolicyBuilder allowUrlProtocols(String... protocols) { invalidateCompiledState(); // If there is at least one allowed protocol, then allow URLs and @@ -489,7 +503,14 @@ public HtmlPolicyBuilder allowUrlProtocols(String... protocols) { /** * Reverses a decision made by {@link #allowUrlProtocols}. + * + *

This does not affect URLs vetted by + * {@link AttributeBuilder#matching(UrlClassifier)} since + * those classifiers whitelist protocols. + * + * @deprecated Prefer {@link AttributeBuilder#matching(UrlClassifier)} */ + @Deprecated public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) { invalidateCompiledState(); for (String protocol : protocols) { @@ -602,8 +623,43 @@ public HtmlPolicyBuilder withPostprocessor(HtmlStreamEventProcessor pp) { * previous calls to this object. * Typically a {@link HtmlStreamRenderer}. */ + @Deprecated public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) { - return toFactory().apply(out); + return build(out, Context.DEFAULT, null, null); + } + + /** + * Produces a policy based on the allow and disallow calls previously made. + * + * @param out receives calls to open only tags allowed by + * previous calls to this object. + * Typically a {@link HtmlStreamRenderer}. + * @param listener is notified of dropped tags and attributes so that + * intrusion detection systems can be alerted to questionable HTML. + * If {@code null} then no notifications are sent. + * @param listenerContext if {@code (listener != null)} then the context + * value passed with alerts. This can be used to let the listener + * know from which connection or request the questionable HTML was + * received. + */ + @Deprecated + public HtmlSanitizer.Policy build( + HtmlStreamEventReceiver out, + @Nullable HtmlChangeListener listener, + @Nullable CTX listenerContext) { + return build(out, Context.DEFAULT, listener, listenerContext); + } + + /** + * Produces a policy based on the allow and disallow calls previously made. + * + * @param out receives calls to open only tags allowed by + * previous calls to this object. + * Typically a {@link HtmlStreamRenderer}. + */ + public HtmlSanitizer.Policy build( + HtmlStreamEventReceiver out, Context context) { + return toFactory().apply(out, context, null, null); } /** @@ -615,15 +671,17 @@ public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) { * @param listener is notified of dropped tags and attributes so that * intrusion detection systems can be alerted to questionable HTML. * If {@code null} then no notifications are sent. - * @param context if {@code (listener != null)} then the context value passed - * with alerts. This can be used to let the listener know from which - * connection or request the questionable HTML was received. + * @param listenerContext if {@code (listener != null)} then the context + * value passed with alerts. This can be used to let the listener + * know from which connection or request the questionable HTML was + * received. */ public HtmlSanitizer.Policy build( HtmlStreamEventReceiver out, + Context context, @Nullable HtmlChangeListener listener, - @Nullable CTX context) { - return toFactory().apply(out, listener, context); + @Nullable CTX listenerContext) { + return toFactory().apply(out, context, listener, listenerContext); } /** @@ -650,11 +708,11 @@ public PolicyFactory toFactory() { private transient CompiledState compiledState; private static final class CompiledState { - final Map globalAttrPolicies; + final Map globalAttrPolicies; final ImmutableMap compiledPolicies; CompiledState( - Map globalAttrPolicies, + Map globalAttrPolicies, ImmutableMap compiledPolicies) { this.globalAttrPolicies = globalAttrPolicies; this.compiledPolicies = compiledPolicies; @@ -671,17 +729,17 @@ private CompiledState compilePolicies() { // Copy maps before normalizing in case builder is reused. @SuppressWarnings("hiding") - Map elPolicies + Map elPolicies = Maps.newLinkedHashMap(this.elPolicies); @SuppressWarnings("hiding") - Map> attrPolicies + Map> attrPolicies = Maps.newLinkedHashMap(this.attrPolicies); - for (Map.Entry> e : + for (Map.Entry> e : attrPolicies.entrySet()) { e.setValue(Maps.newLinkedHashMap(e.getValue())); } @SuppressWarnings("hiding") - Map globalAttrPolicies + Map globalAttrPolicies = Maps.newLinkedHashMap(this.globalAttrPolicies); @SuppressWarnings("hiding") Set allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols); @@ -711,7 +769,7 @@ private CompiledState compilePolicies() { // attribute globally. StylingPolicy stylingPolicy = null; { - final AttributePolicy urlAttributePolicy; + final AttributePolicy.V2 urlAttributePolicy; if (allowedProtocols.size() == 3 && allowedProtocols.contains("mailto") && allowedProtocols.contains("http") @@ -723,13 +781,13 @@ private CompiledState compilePolicies() { } if (this.stylingPolicySchema != null) { - final AttributePolicy styleUrlPolicyFinal = AttributePolicy.Util.join( - styleUrlPolicy, urlAttributePolicy); + final AttributePolicy.V2 styleUrlPolicyFinal = + AttributePolicy.Util.join(styleUrlPolicy, urlAttributePolicy); stylingPolicy = new StylingPolicy( stylingPolicySchema, - new Function() { - public String apply(String url) { - return styleUrlPolicyFinal.apply("img", "src", url); + new UrlRewriter() { + public String rewrite(String url, Context context) { + return styleUrlPolicyFinal.apply("img", "src", url, context); } }); } @@ -738,19 +796,26 @@ public String apply(String url) { for (String urlAttributeName : URL_ATTRIBUTE_NAMES) { if (globalAttrPolicies.containsKey(urlAttributeName)) { toGuard.remove(urlAttributeName); - globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join( - urlAttributePolicy, globalAttrPolicies.get(urlAttributeName))); + AttributePolicy.V2 unguarded = + globalAttrPolicies.get(urlAttributeName); + if (!isGuardedByUrlClassifier(unguarded)) { + globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join( + urlAttributePolicy, unguarded)); + } } } // Implement guards not implemented on global policies in the per-element // policy maps. - for (Map.Entry> e + for (Map.Entry> e : attrPolicies.entrySet()) { - Map policies = e.getValue(); + Map policies = e.getValue(); for (String urlAttributeName : toGuard) { if (policies.containsKey(urlAttributeName)) { - policies.put(urlAttributeName, AttributePolicy.Util.join( - urlAttributePolicy, policies.get(urlAttributeName))); + AttributePolicy.V2 unguarded = policies.get(urlAttributeName); + if (!isGuardedByUrlClassifier(unguarded)) { + policies.put(urlAttributeName, AttributePolicy.Util.join( + urlAttributePolicy, unguarded)); + } } } } @@ -765,20 +830,20 @@ public String apply(String url) { ImmutableMap.Builder policiesBuilder = ImmutableMap.builder(); - for (Map.Entry e : elPolicies.entrySet()) { + for (Map.Entry e : elPolicies.entrySet()) { String elementName = e.getKey(); - ElementPolicy elPolicy = e.getValue(); + ElementPolicy.V2 elPolicy = e.getValue(); if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) { continue; } - Map elAttrPolicies + Map elAttrPolicies = attrPolicies.get(elementName); if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); } if (stylingPolicy != null) { - AttributePolicy old = elAttrPolicies.get("style"); + AttributePolicy.V2 old = elAttrPolicies.get("style"); if (old != null) { attrPolicies.put( elementName, @@ -788,23 +853,24 @@ public String apply(String url) { } } - ImmutableMap.Builder attrs + ImmutableMap.Builder attrs = ImmutableMap.builder(); - for (Map.Entry ape : elAttrPolicies.entrySet()) { + for (Map.Entry ape + : elAttrPolicies.entrySet()) { String attributeName = ape.getKey(); // Handle below so we don't end up putting the same key into the map // twice. ImmutableMap.Builder hates that. if (globalAttrPolicies.containsKey(attributeName)) { continue; } - AttributePolicy policy = ape.getValue(); + AttributePolicy.V2 policy = ape.getValue(); if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) { attrs.put(attributeName, policy); } } - for (Map.Entry ape + for (Map.Entry ape : globalAttrPolicies.entrySet()) { String attributeName = ape.getKey(); - AttributePolicy policy = AttributePolicy.Util.join( + AttributePolicy.V2 policy = AttributePolicy.Util.join( elAttrPolicies.get(attributeName), ape.getValue()); if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) { attrs.put(attributeName, policy); @@ -822,6 +888,15 @@ public String apply(String url) { return compiledState; } + private static boolean isGuardedByUrlClassifier(AttributePolicy.V2 p) { + for (AttributePolicy.V2 ep : AttributePolicy.Util.unjoin(p)) { + if (ep instanceof UrlClassifierAttributePolicy) { + return true; + } + } + return false; + } + /** * Builds the relationship between attributes, the values that they may have, * and the elements on which they may appear. @@ -848,6 +923,22 @@ public AttributeBuilder matching(AttributePolicy attrPolicy) { return this; } + /** + * Filters out values which are not valid URLs that match the given + * classifier. + * + *

When this is provided, the + * {@linkplain HtmlPolicyBuilder#allowUrlProtocols allowed URL protocols} + * and {@linkplain HtmlPolicyBuilder#disallowUrlProtocols + * disallowed URL protocols} + * are not applied in preference of the + * {@linkplain org.owasp.url.UrlClassifierBuilder#scheme scheme whitelist} + * implicit in the classifier. + */ + public AttributeBuilder matching(UrlClassifier classifier) { + return matching(new UrlClassifierAttributePolicy(classifier)); + } + /** * Restrict the values allowed by later {@code allow*} calls to those * matching the pattern. @@ -945,6 +1036,7 @@ public HtmlPolicyBuilder onElements(String... elementNames) { private static final class RelsOnLinksPolicy + extends ElementPolicy.Util.AbstractV2ElementPolicy implements ElementPolicy.JoinableElementPolicy { final ImmutableSet extra; final ImmutableSet skip; @@ -982,7 +1074,8 @@ private static int indexOfAttributeValue( return -1; } - public String apply(String elementName, List attrs) { + public String apply( + String elementName, List attrs, Context context) { if (indexOfAttributeValue("href", attrs) >= 0) { // It's a link. boolean hasTarget = indexOfAttributeValue("target", attrs) >= 0; @@ -1042,5 +1135,40 @@ public JoinableElementPolicy join( return RelsOnLinksPolicy.create(extra, skip); } } + + static final class UrlClassifierAttributePolicy + extends AttributePolicy.Util.AbstractV2AttributePolicy { + final UrlClassifier urlClassifier; + + UrlClassifierAttributePolicy(UrlClassifier urlClassifier) { + this.urlClassifier = urlClassifier; + } + + public String apply( + String elementName, String attributeName, String value, + Context context) { + if (value == null) { + return null; + } + UrlContext urlContext = context != null + ? context.urlContext() + : UrlContext.DEFAULT; + UrlValue urlValue = UrlValue.from(urlContext, value.trim()); + Classification classification = urlClassifier.apply( + urlValue, Diagnostic.Receiver.NULL); + switch (classification) { + case INVALID: + case NOT_A_MATCH: + return null; + case MATCH: + if (!urlValue.inheritsPlaceholderAuthority + && !urlValue.cornerCases.isEmpty()) { + return urlValue.urlText; + } + return urlValue.originalUrlText; + } + throw new AssertionError(classification); + } + } } diff --git a/src/main/java/org/owasp/html/HtmlSanitizer.java b/src/main/java/org/owasp/html/HtmlSanitizer.java index 3a9493b4..2c024464 100644 --- a/src/main/java/org/owasp/html/HtmlSanitizer.java +++ b/src/main/java/org/owasp/html/HtmlSanitizer.java @@ -129,13 +129,13 @@ public static void sanitize( public static void sanitize( @Nullable String html, final Policy policy, HtmlStreamEventProcessor preprocessor) { - if (html == null) { html = ""; } + String htmlText = html != null ? html : ""; HtmlStreamEventReceiver receiver = initializePolicy(policy, preprocessor); receiver.openDocument(); - HtmlLexer lexer = new HtmlLexer(html); + HtmlLexer lexer = new HtmlLexer(htmlText); // Use a linked list so that policies can use Iterator.remove() in an O(1) // way. LinkedList attrs = Lists.newLinkedList(); @@ -144,16 +144,16 @@ public static void sanitize( switch (token.type) { case TEXT: receiver.text( - Encoding.decodeHtml(html.substring(token.start, token.end))); + Encoding.decodeHtml(htmlText.substring(token.start, token.end))); break; case UNESCAPED: receiver.text(Encoding.stripBannedCodeunits( - html.substring(token.start, token.end))); + htmlText.substring(token.start, token.end))); break; case TAGBEGIN: - if (html.charAt(token.start + 1) == '/') { // A close tag. + if (htmlText.charAt(token.start + 1) == '/') { // A close tag. receiver.closeTag(HtmlLexer.canonicalName( - html.substring(token.start + 2, token.end))); + htmlText.substring(token.start + 2, token.end))); while (lexer.hasNext() && lexer.next().type != HtmlTokenType.TAGEND) { // skip tokens until we see a ">" @@ -173,12 +173,12 @@ public static void sanitize( } else { attrsReadyForName = false; } - attrs.add(HtmlLexer.canonicalName( - html.substring(tagBodyToken.start, tagBodyToken.end))); + attrs.add(HtmlLexer.canonicalName(htmlText.substring( + tagBodyToken.start, tagBodyToken.end))); break; case ATTRVALUE: - attrs.add(Encoding.decodeHtml(stripQuotes( - html.substring(tagBodyToken.start, tagBodyToken.end)))); + attrs.add(Encoding.decodeHtml(stripQuotes(htmlText.substring( + tagBodyToken.start, tagBodyToken.end)))); attrsReadyForName = true; break; case TAGEND: @@ -192,7 +192,7 @@ public static void sanitize( } receiver.openTag( HtmlLexer.canonicalName( - html.substring(token.start + 1, token.end)), + htmlText.substring(token.start + 1, token.end)), attrs); } break; diff --git a/src/main/java/org/owasp/html/HtmlStreamRenderer.java b/src/main/java/org/owasp/html/HtmlStreamRenderer.java index f8436f61..3ab4a4f1 100644 --- a/src/main/java/org/owasp/html/HtmlStreamRenderer.java +++ b/src/main/java/org/owasp/html/HtmlStreamRenderer.java @@ -75,8 +75,9 @@ public static HtmlStreamRenderer create( return new CloseableHtmlStreamRenderer( output, ioExHandler, badHtmlHandler); } else if (AutoCloseableHtmlStreamRenderer.isAutoCloseable(output)) { - return AutoCloseableHtmlStreamRenderer.createAutoCloseableHtmlStreamRenderer( - output, ioExHandler, badHtmlHandler); + return AutoCloseableHtmlStreamRenderer + .createAutoCloseableHtmlStreamRenderer( + output, ioExHandler, badHtmlHandler); } else { return new HtmlStreamRenderer(output, ioExHandler, badHtmlHandler); } diff --git a/src/main/java/org/owasp/html/JoinedAttributePolicy.java b/src/main/java/org/owasp/html/JoinedAttributePolicy.java index fc1b9195..eee8675d 100644 --- a/src/main/java/org/owasp/html/JoinedAttributePolicy.java +++ b/src/main/java/org/owasp/html/JoinedAttributePolicy.java @@ -37,19 +37,21 @@ @Immutable -final class JoinedAttributePolicy implements AttributePolicy { - final ImmutableList policies; +final class JoinedAttributePolicy +extends AttributePolicy.Util.AbstractV2AttributePolicy { + final ImmutableList policies; - JoinedAttributePolicy(Collection policies) { + JoinedAttributePolicy(Collection policies) { this.policies = ImmutableList.copyOf(policies); } public @Nullable String apply( - String elementName, String attributeName, @Nullable String rawValue) { + String elementName, String attributeName, @Nullable String rawValue, + Context context) { String value = rawValue; - for (AttributePolicy p : policies) { + for (AttributePolicy.V2 p : policies) { if (value == null) { break; } - value = p.apply(elementName, attributeName, value); + value = p.apply(elementName, attributeName, value, context); } return value; } diff --git a/src/main/java/org/owasp/html/PolicyFactory.java b/src/main/java/org/owasp/html/PolicyFactory.java index 1f09dba7..1ec20c75 100644 --- a/src/main/java/org/owasp/html/PolicyFactory.java +++ b/src/main/java/org/owasp/html/PolicyFactory.java @@ -54,7 +54,7 @@ public final class PolicyFactory implements Function { private final ImmutableMap policies; - private final ImmutableMap globalAttrPolicies; + private final ImmutableMap globalAttrPolicies; private final ImmutableSet textContainers; private final HtmlStreamEventProcessor preprocessor; private final HtmlStreamEventProcessor postprocessor; @@ -62,7 +62,7 @@ public final class PolicyFactory PolicyFactory( ImmutableMap policies, ImmutableSet textContainers, - ImmutableMap globalAttrPolicies, + ImmutableMap globalAttrPolicies, HtmlStreamEventProcessor preprocessor, HtmlStreamEventProcessor postprocessor) { this.policies = policies; @@ -73,9 +73,16 @@ public final class PolicyFactory } /** Produces a sanitizer that emits tokens to {@code out}. */ + @Deprecated public HtmlSanitizer.Policy apply(@Nonnull HtmlStreamEventReceiver out) { + return apply(out, Context.DEFAULT); + } + + /** Produces a sanitizer that emits tokens to {@code out}. */ + public HtmlSanitizer.Policy apply( + @Nonnull HtmlStreamEventReceiver out, Context context) { return new ElementAndAttributePolicyBasedSanitizerPolicy( - postprocessor.wrap(out), policies, textContainers); + postprocessor.wrap(out), context, policies, textContainers); } /** @@ -85,26 +92,34 @@ public HtmlSanitizer.Policy apply(@Nonnull HtmlStreamEventReceiver out) { * @param listener if non-null, receives notifications of tags and attributes * that were rejected by the policy. This may tie into intrusion * detection systems. - * @param context if {@code (listener != null)} then the context value passed - * with notifications. This can be used to let the listener know from - * which connection or request the questionable HTML was received. + * @param listenerContext if {@code (listener != null)} then the context + * value passed with notifications. This can be used to let the listener + * know from which connection or request the questionable HTML was + * received. */ public HtmlSanitizer.Policy apply( - HtmlStreamEventReceiver out, @Nullable HtmlChangeListener listener, - @Nullable CTX context) { + HtmlStreamEventReceiver out, Context context, + @Nullable HtmlChangeListener listener, + @Nullable CTX listenerContext) { if (listener == null) { - return apply(out); + return apply(out, context); } else { HtmlChangeReporter r = new HtmlChangeReporter( - out, listener, context); - r.setPolicy(apply(r.getWrappedRenderer())); + out, listener, listenerContext); + r.setPolicy(apply(r.getWrappedRenderer(), context)); return r.getWrappedPolicy(); } } /** A convenience function that sanitizes a string of HTML. */ + @Deprecated public String sanitize(@Nullable String html) { - return sanitize(html, null, null); + return sanitize(html, Context.DEFAULT, null, null); + } + + /** A convenience function that sanitizes a string of HTML. */ + public String sanitize(@Nullable String html, Context context) { + return sanitize(html, context, null, null); } /** @@ -114,22 +129,25 @@ public String sanitize(@Nullable String html) { * @param listener if non-null, receives notifications of tags and attributes * that were rejected by the policy. This may tie into intrusion * detection systems. - * @param context if {@code (listener != null)} then the context value passed - * with notifications. This can be used to let the listener know from + * @param listenerContext if {@code (listener != null)} then the context + * value passed with notifications. + * This can be used to let the listener know from * which connection or request the questionable HTML was received. * @return a string of HTML that complies with this factory's policy. */ public String sanitize( - @Nullable String html, - @Nullable HtmlChangeListener listener, @Nullable CTX context) { + @Nullable String html, Context context, + @Nullable HtmlChangeListener listener, + @Nullable CTX listenerContext) { if (html == null) { return ""; } StringBuilder out = new StringBuilder(html.length()); HtmlSanitizer.sanitize( html, apply( HtmlStreamRenderer.create(out, Handler.DO_NOTHING), + context, listener, - context), + listenerContext), preprocessor); return out.toString(); } @@ -178,14 +196,15 @@ public PolicyFactory and(PolicyFactory f) { .addAll(f.textContainers) .build(); } - ImmutableMap allGlobalAttrPolicies; + ImmutableMap allGlobalAttrPolicies; if (f.globalAttrPolicies.isEmpty()) { allGlobalAttrPolicies = this.globalAttrPolicies; } else if (this.globalAttrPolicies.isEmpty()) { allGlobalAttrPolicies = f.globalAttrPolicies; } else { - ImmutableMap.Builder ab = ImmutableMap.builder(); - for (Map.Entry e + ImmutableMap.Builder ab = + ImmutableMap.builder(); + for (Map.Entry e : this.globalAttrPolicies.entrySet()) { String attrName = e.getKey(); ab.put( @@ -193,7 +212,7 @@ public PolicyFactory and(PolicyFactory f) { AttributePolicy.Util.join( e.getValue(), f.globalAttrPolicies.get(attrName))); } - for (Map.Entry e + for (Map.Entry e : f.globalAttrPolicies.entrySet()) { String attrName = e.getKey(); if (!this.globalAttrPolicies.containsKey(attrName)) { diff --git a/src/main/java/org/owasp/html/Sanitizers.java b/src/main/java/org/owasp/html/Sanitizers.java index ed6f4d93..c5682a7e 100644 --- a/src/main/java/org/owasp/html/Sanitizers.java +++ b/src/main/java/org/owasp/html/Sanitizers.java @@ -117,7 +117,10 @@ public String apply( /** * Allows {@code } elements from HTTP, HTTPS, and relative sources. */ + @SuppressWarnings("deprecation") public static final PolicyFactory IMAGES = new HtmlPolicyBuilder() + // TODO: Use a UrlClassifier instead once we've ensured that it's + // compatible with all supported JDKs. .allowUrlProtocols("http", "https").allowElements("img") .allowAttributes("alt", "src").onElements("img") .allowAttributes("border", "height", "width").matching(INTEGER) diff --git a/src/main/java/org/owasp/html/StandardUrlAttributePolicy.java b/src/main/java/org/owasp/html/StandardUrlAttributePolicy.java index 3b057cf3..f6695728 100644 --- a/src/main/java/org/owasp/html/StandardUrlAttributePolicy.java +++ b/src/main/java/org/owasp/html/StandardUrlAttributePolicy.java @@ -33,14 +33,17 @@ * {@code http}, {@code https}, {@code mailto}. */ @TCB -final class StandardUrlAttributePolicy implements AttributePolicy { +final class StandardUrlAttributePolicy +extends AttributePolicy.Util.AbstractV2AttributePolicy { static final StandardUrlAttributePolicy INSTANCE = new StandardUrlAttributePolicy(); private StandardUrlAttributePolicy() { /* singleton */ } - public String apply(String elementName, String attributeName, String value) { + public String apply( + String elementName, String attributeName, String value, + Context context) { String url = Strings.stripHtmlSpaces(value); protocol_loop: diff --git a/src/main/java/org/owasp/html/StylingPolicy.java b/src/main/java/org/owasp/html/StylingPolicy.java index 370bf209..f35aa64b 100644 --- a/src/main/java/org/owasp/html/StylingPolicy.java +++ b/src/main/java/org/owasp/html/StylingPolicy.java @@ -35,8 +35,6 @@ import org.owasp.html.AttributePolicy.JoinableAttributePolicy; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Function; -import com.google.common.base.Functions; import com.google.common.collect.Lists; /** @@ -45,19 +43,23 @@ * ones to reduce the attack-surface. */ @TCB -final class StylingPolicy implements JoinableAttributePolicy { +final class StylingPolicy +extends AttributePolicy.Util.AbstractV2AttributePolicy +implements JoinableAttributePolicy { final CssSchema cssSchema; - final Function urlRewriter; + final UrlRewriter urlRewriter; - StylingPolicy(CssSchema cssSchema, Function urlRewriter) { + StylingPolicy( + CssSchema cssSchema, + UrlRewriter urlRewriter) { this.cssSchema = cssSchema; this.urlRewriter = urlRewriter; } public @Nullable String apply( - String elementName, String attributeName, String value) { - return value != null ? sanitizeCssProperties(value) : null; + String elementName, String attributeName, String value, Context context) { + return value != null ? sanitizeCssProperties(value, context) : null; } /** @@ -68,7 +70,7 @@ final class StylingPolicy implements JoinableAttributePolicy { * @return A sanitized version of the input. */ @VisibleForTesting - String sanitizeCssProperties(String style) { + String sanitizeCssProperties(String style, final Context context) { final StringBuilder sanitizedCss = new StringBuilder(); CssGrammar.parsePropertyGroup(style, new CssGrammar.PropertyHandler() { CssSchema.Property cssProperty = CssSchema.DISALLOWED; @@ -94,7 +96,7 @@ private void closeQuotedIdents() { private void sanitizeAndAppendUrl(String urlContent) { if (urlContent.length() < 1024) { - String rewrittenUrl = urlRewriter.apply(urlContent); + String rewrittenUrl = urlRewriter.rewrite(urlContent, context); if (rewrittenUrl != null && !rewrittenUrl.isEmpty()) { if (hasTokens) { sanitizedCss.append(' '); } sanitizedCss.append("url('").append(rewrittenUrl).append("')"); @@ -274,9 +276,9 @@ static final class StylingPolicyJoinStrategy public JoinableAttributePolicy join( Iterable toJoin) { - Function identity = Functions.identity(); + UrlRewriter identity = UrlRewriter.IDENTITY; CssSchema cssSchema = null; - Function urlRewriter = identity; + UrlRewriter urlRewriter = identity; for (JoinableAttributePolicy p : toJoin) { StylingPolicy sp = (StylingPolicy) p; cssSchema = cssSchema == null @@ -284,10 +286,21 @@ public JoinableAttributePolicy join( urlRewriter = urlRewriter.equals(identity) || urlRewriter.equals(sp.urlRewriter) ? sp.urlRewriter - : Functions.compose(urlRewriter, sp.urlRewriter); + : compose(urlRewriter, sp.urlRewriter); } return new StylingPolicy(cssSchema, urlRewriter); } } + + static UrlRewriter compose(final UrlRewriter a, final UrlRewriter b) { + return new UrlRewriter() { + + public String rewrite(String urlText, Context context) { + String rw = b.rewrite(urlText, context); + return rw != null ? a.rewrite(rw, context) : null; + } + + }; + } } diff --git a/src/main/java/org/owasp/html/UrlRewriter.java b/src/main/java/org/owasp/html/UrlRewriter.java new file mode 100644 index 00000000..b2f37333 --- /dev/null +++ b/src/main/java/org/owasp/html/UrlRewriter.java @@ -0,0 +1,49 @@ +// Copyright (c) 2017, Mike Samuel +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// Neither the name of the OWASP nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +package org.owasp.html; + +/** + * Maps URLs from untrusted text to trusted URLs. + */ +public interface UrlRewriter { + + /** + * @param context the context of the document that will embed the output. + * @return the trusted URL or null to not trust any URL. + */ + String rewrite(String urlText, Context context); + + /** Returns the input unfiltered. */ + public static UrlRewriter IDENTITY = new UrlRewriter() { + + public String rewrite(String urlText, Context context) { + return urlText; + } + }; +} diff --git a/src/main/java/org/owasp/html/examples/EbayPolicyExample.java b/src/main/java/org/owasp/html/examples/EbayPolicyExample.java index 2b15879a..d204e0fa 100644 --- a/src/main/java/org/owasp/html/examples/EbayPolicyExample.java +++ b/src/main/java/org/owasp/html/examples/EbayPolicyExample.java @@ -32,6 +32,7 @@ import java.io.InputStreamReader; import java.util.regex.Pattern; +import org.owasp.html.Context; import org.owasp.html.Handler; import org.owasp.html.HtmlPolicyBuilder; import org.owasp.html.HtmlSanitizer; @@ -228,7 +229,9 @@ public void handle(String x) { } }); // Use the policy defined above to sanitize the HTML. - HtmlSanitizer.sanitize(html, POLICY_DEFINITION.apply(renderer)); + HtmlSanitizer.sanitize( + html, + POLICY_DEFINITION.apply(renderer, Context.DEFAULT)); } private static Predicate matchesEither( diff --git a/src/main/java/org/owasp/html/examples/UrlTextExample.java b/src/main/java/org/owasp/html/examples/UrlTextExample.java index 357667bd..eb3c16b3 100644 --- a/src/main/java/org/owasp/html/examples/UrlTextExample.java +++ b/src/main/java/org/owasp/html/examples/UrlTextExample.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.List; +import org.owasp.html.Context; import org.owasp.html.HtmlPolicyBuilder; import org.owasp.html.HtmlStreamEventReceiver; import org.owasp.html.HtmlStreamEventReceiverWrapper; @@ -134,7 +135,9 @@ public HtmlStreamEventReceiver wrap(HtmlStreamEventReceiver sink) { } ).toFactory(); - out.append(policyBuilder.sanitize(Joiner.on('\n').join(inputs))); + out.append(policyBuilder.sanitize( + Joiner.on('\n').join(inputs), + Context.DEFAULT)); } /** diff --git a/src/test/java/org/owasp/html/AntiSamyTest.java b/src/test/java/org/owasp/html/AntiSamyTest.java index 3a09d02e..7908504a 100644 --- a/src/test/java/org/owasp/html/AntiSamyTest.java +++ b/src/test/java/org/owasp/html/AntiSamyTest.java @@ -82,7 +82,7 @@ public String apply( .allowStandardUrlProtocols() .requireRelNofollowOnLinks() .allowStyling() - .build(renderer); + .build(renderer, Context.DEFAULT); } static String sanitize(String html) { diff --git a/src/test/java/org/owasp/html/Benchmark.java b/src/test/java/org/owasp/html/Benchmark.java index af3e8613..08a9a460 100644 --- a/src/test/java/org/owasp/html/Benchmark.java +++ b/src/test/java/org/owasp/html/Benchmark.java @@ -213,7 +213,8 @@ public void handle(String x) { } }); - HtmlSanitizer.sanitize(html, policyBuilder.build(renderer)); + HtmlSanitizer.sanitize( + html, policyBuilder.build(renderer, Context.DEFAULT)); return sb.toString(); } diff --git a/src/test/java/org/owasp/html/ElementPolicyTest.java b/src/test/java/org/owasp/html/ElementPolicyTest.java index 2ebe6ad8..fcac8331 100644 --- a/src/test/java/org/owasp/html/ElementPolicyTest.java +++ b/src/test/java/org/owasp/html/ElementPolicyTest.java @@ -19,7 +19,8 @@ @SuppressWarnings("javadoc") public final class ElementPolicyTest extends TestCase { - static class HasCharElementPolicy implements ElementPolicy { + static final class HasCharElementPolicy + extends ElementPolicy.Util.AbstractV2ElementPolicy { final char ch; HasCharElementPolicy(char ch) { @@ -27,7 +28,7 @@ static class HasCharElementPolicy implements ElementPolicy { } public @Nullable - String apply(String elementName, List attrs) { + String apply(String elementName, List attrs, Context context) { attrs.clear(); return elementName.indexOf(ch) >= 0 ? elementName : null; } @@ -38,11 +39,11 @@ public String toString() { } } - private static void assertPassed(ElementPolicy p, String... expected) { + private static void assertPassed(ElementPolicy.V2 p, String... expected) { List attrs = Lists.newArrayList(); ImmutableList.Builder actual = ImmutableList.builder(); for (String elName : TEST_EL_NAMES) { - if (p.apply(elName, attrs) != null) { + if (p.apply(elName, attrs, Context.DEFAULT) != null) { actual.add(elName); } } @@ -54,18 +55,18 @@ private static void assertPassed(ElementPolicy p, String... expected) { @Test public static final void testJoin() { - ElementPolicy a = new HasCharElementPolicy('a'); - ElementPolicy b = new HasCharElementPolicy('b'); - ElementPolicy c = new HasCharElementPolicy('c'); - ElementPolicy d = new HasCharElementPolicy('d'); + ElementPolicy.V2 a = new HasCharElementPolicy('a'); + ElementPolicy.V2 b = new HasCharElementPolicy('b'); + ElementPolicy.V2 c = new HasCharElementPolicy('c'); + ElementPolicy.V2 d = new HasCharElementPolicy('d'); assertPassed(REJECT_ALL_ELEMENT_POLICY); assertPassed(IDENTITY_ELEMENT_POLICY, TEST_EL_NAMES.toArray(new String[0])); assertPassed(a, "abacus", "abracadabra", "bar", "far", "cadr"); assertPassed(c, "abacus", "abracadabra", "cadr", "cdr"); assertPassed(d, "abracadabra", "cadr", "cdr"); - ElementPolicy a_b = join(a, b); - ElementPolicy b_a = join(b, a); + ElementPolicy.V2 a_b = join(a, b); + ElementPolicy.V2 b_a = join(b, a); assertPassed(a_b, "abacus", "abracadabra", "bar"); assertPassed(b_a, "abacus", "abracadabra", "bar"); assertPassed(join(b_a, b_a), "abacus", "abracadabra", "bar"); diff --git a/src/test/java/org/owasp/html/ExamplesTest.java b/src/test/java/org/owasp/html/ExamplesTest.java index 8d438311..2a7d907d 100644 --- a/src/test/java/org/owasp/html/ExamplesTest.java +++ b/src/test/java/org/owasp/html/ExamplesTest.java @@ -41,7 +41,7 @@ import junit.framework.TestCase; -@SuppressWarnings("javadoc") +@SuppressWarnings({"javadoc", "deprecation"}) public class ExamplesTest extends TestCase { @Test public static final void testExamplesRun() throws Exception { diff --git a/src/test/java/org/owasp/html/HtmlChangeReporterTest.java b/src/test/java/org/owasp/html/HtmlChangeReporterTest.java index 9c8ad7d5..be5f96dc 100644 --- a/src/test/java/org/owasp/html/HtmlChangeReporterTest.java +++ b/src/test/java/org/owasp/html/HtmlChangeReporterTest.java @@ -35,26 +35,26 @@ @SuppressWarnings("javadoc") public class HtmlChangeReporterTest extends TestCase { - static class Context { + static class TestContext { // Opaque test value compared via equality. } @Test public static final void testChangeReporting() { - final Context testContext = new Context(); + final TestContext testContext = new TestContext(); StringBuilder out = new StringBuilder(); final StringBuilder log = new StringBuilder(); HtmlStreamRenderer renderer = HtmlStreamRenderer.create( out, Handler.DO_NOTHING); - HtmlChangeListener listener = new HtmlChangeListener() { - public void discardedTag(Context context, String elementName) { - assertSame(testContext, context); + HtmlChangeListener listener = new HtmlChangeListener() { + public void discardedTag(TestContext tcontext, String elementName) { + assertSame(testContext, tcontext); log.append('<').append(elementName).append("> "); } public void discardedAttributes( - Context context, String tagName, String... attributeNames) { + TestContext context, String tagName, String... attributeNames) { assertSame(testContext, context); log.append('<').append(tagName); for (String attributeName : attributeNames) { @@ -63,10 +63,11 @@ public void discardedAttributes( log.append("> "); } }; - HtmlChangeReporter hcr = new HtmlChangeReporter( + HtmlChangeReporter hcr = new HtmlChangeReporter( renderer, listener, testContext); - hcr.setPolicy(Sanitizers.FORMATTING.apply(hcr.getWrappedRenderer())); + hcr.setPolicy(Sanitizers.FORMATTING.apply( + hcr.getWrappedRenderer(), Context.DEFAULT)); String html = ",World!" + "

"; diff --git a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java index 9e0a866a..6f647475 100644 --- a/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java +++ b/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java @@ -32,12 +32,16 @@ import java.util.Locale; import org.junit.Test; +import org.owasp.url.Absolutizer; +import org.owasp.url.BuiltinScheme; +import org.owasp.url.UrlClassifiers; +import org.owasp.url.UrlContext; import com.google.common.base.Joiner; import junit.framework.TestCase; -@SuppressWarnings("javadoc") +@SuppressWarnings({"javadoc", "deprecation"}) public class HtmlPolicyBuilderTest extends TestCase { static final String EXAMPLE = Joiner.on('\n').join( @@ -176,7 +180,7 @@ public static final void testLinksWithNofollow() { } @Test - public static final void testImagesAllowed() { + public static final void testImagesAllowedDeprecated() { assertEquals( Joiner.on('\n').join( "Header", @@ -194,6 +198,40 @@ public static final void testImagesAllowed() { .allowUrlProtocols("https"))); } + @Test + public static final void testImagesAllowed() { + // If we want relative URLs to pass a classifier that + // only allows HTTPS, we need to use a context with a + // base URL that uses HTTPS. + UrlContext urlContext = new UrlContext( + new Absolutizer( + UrlContext.DEFAULT.absolutizer.schemes, + "https://foo.com/bar")); + Context context = new Context(urlContext); + + assertEquals( + Joiner.on('\n').join( + "Header", + "Paragraph 1", + "Click me out", + "<img src=\"canary.png\" alt=\"local-canary\" />", + // HTTP img not output because only HTTPS allowed. + "Fancy with soupy tags.", + "Stylish Para 1", + "Stylish Para 2", + ""), + apply(new HtmlPolicyBuilder() + .allowElements("img") + .allowAttributes("alt").onElements("img") + .allowAttributes("src") + .matching( + UrlClassifiers.builder().scheme(BuiltinScheme.HTTPS) + .build()) + .onElements("img"), + EXAMPLE, + context)); + } + @Test public static final void testStyleFiltering() { assertEquals( @@ -256,6 +294,7 @@ public String apply(String elementName, List<String> attrs) { .allowElements("div"), "<body>foo</body>")); } + @Test public static final void testAllowUrlProtocols() { assertEquals( @@ -275,6 +314,30 @@ public static final void testAllowUrlProtocols() { .allowUrlProtocols("http"))); } + @Test + public static final void testMatchUrlClassifier() { + assertEquals( + Joiner.on('\n').join( + "Header", + "Paragraph 1", + "Click me out", + "<img src=\"canary.png\" alt=\"local-canary\" />" + + "<img src=\"http://canaries.org/canary.png\" />", + "Fancy with soupy tags.", + "Stylish Para 1", + "Stylish Para 2", + ""), + apply(new HtmlPolicyBuilder() + .allowElements("img") + .allowAttributes("alt").onElements("img") + .allowAttributes("src") + .matching( + UrlClassifiers.builder() + .scheme(BuiltinScheme.HTTP) + .build()) + .onElements("img"))); + } + @Test public static final void testPossibleFalloutFromIssue5() { assertEquals( @@ -285,9 +348,20 @@ public static final void testPossibleFalloutFromIssue5() { .allowAttributes("href").onElements("a") .allowUrlProtocols("http"), + "<a href='javascript:alert(1337)//:http'>Bad</a>")); + assertEquals( + "Bad", + apply( + new HtmlPolicyBuilder() + .allowElements("a") + .allowAttributes("href") + .matching( + UrlClassifiers.builder().scheme(BuiltinScheme.HTTP).build()) + .onElements("a"), "<a href='javascript:alert(1337)//:http'>Bad</a>")); } + @Test public static final void testTextInOption() { assertEquals( @@ -767,11 +841,12 @@ private static String apply(HtmlPolicyBuilder b) { return apply(b, EXAMPLE); } - private static String apply(HtmlPolicyBuilder b, String src) { + private static String apply(HtmlPolicyBuilder b, String src, Context context) { return b.toFactory().sanitize( - src, null, - new Handler<String>() { - public void handle(String x) { fail(x); } - }); + src, context, null, null); + } + + private static String apply(HtmlPolicyBuilder b, String src) { + return apply(b, src, Context.DEFAULT); } } diff --git a/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/src/test/java/org/owasp/html/HtmlSanitizerTest.java index d9165a63..8605c8a7 100644 --- a/src/test/java/org/owasp/html/HtmlSanitizerTest.java +++ b/src/test/java/org/owasp/html/HtmlSanitizerTest.java @@ -449,7 +449,7 @@ public String apply( .allowStyling() // Don't throw out useless <img> and <input> elements to ease debugging. .allowWithoutAttributes("img", "input") - .build(renderer); + .build(renderer, Context.DEFAULT); HtmlSanitizer.sanitize(html, policy); diff --git a/src/test/java/org/owasp/html/SanitizersTest.java b/src/test/java/org/owasp/html/SanitizersTest.java index a2712a35..dbf3a255 100644 --- a/src/test/java/org/owasp/html/SanitizersTest.java +++ b/src/test/java/org/owasp/html/SanitizersTest.java @@ -29,6 +29,11 @@ package org.owasp.html; import org.junit.Test; +import org.owasp.url.Absolutizer; +import org.owasp.url.BuiltinScheme; +import org.owasp.url.MediaTypeClassifiers; +import org.owasp.url.UrlClassifiers; +import org.owasp.url.UrlContext; import java.util.BitSet; import java.util.Iterator; @@ -40,7 +45,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -@SuppressWarnings("javadoc") +@SuppressWarnings({"javadoc", "deprecation"}) public class SanitizersTest extends TestCase { @Test @@ -222,6 +227,60 @@ public static final void testExplicitlyAllowedProtocolsAreCaseInsensitive() { assertEquals(want, s.sanitize(input)); } + @Test + public static final void testDataUrlMediaTypeRestrictions() { + UrlContext context = new UrlContext( + new Absolutizer( + UrlContext.DEFAULT.absolutizer.schemes, + "https://foo.com/bar/")); + PolicyFactory s = new HtmlPolicyBuilder() + .allowElements("a", "img", "p") + .allowAttributes("href") + .matching( + UrlClassifiers.builder() + .scheme( + BuiltinScheme.HTTPS, BuiltinScheme.MAILTO, BuiltinScheme.TEL) + .schemeData( + MediaTypeClassifiers.builder() + .type("text", "html") + .build()) + .build()) + .onElements("a") + .allowAttributes("src") + .matching( + UrlClassifiers.builder() + .scheme(BuiltinScheme.HTTPS) + .schemeData( + MediaTypeClassifiers.builder() + .type("image", "gif") + .type("image", "png") + .build()) + .build()) + .onElements("img") + .toFactory(); + + String input = ( + "<a href=\"mailto:foo@bar.com\">email</a>" + + "\n| <a href=\"foo.html\">foo</a>" + + "\n| <a href=\"data:text/html,bar\">bar</a>" + + "\n| <a href=\"data:image/png;base64,...\">" + + "<img src=\"data:image/png;base64,...\">" + + "</a>" + + "\n| <a href=\"foo.png\"><img src=\"foo.png\"></a>" + ); + String want = ( + "<a href=\"mailto:foo&#64;bar.com\">email</a>" + + "\n| <a href=\"foo.html\">foo</a>" + + "\n| <a href=\"data:text/html,bar\">bar</a>" + // We did not explicitly allow links to data:image. + + "\n| <img src=\"data:image/png;base64,...\" />" + // Media types on data have no effect on https l + + "\n| <a href=\"foo.png\"><img src=\"foo.png\" /></a>" + ); + + assertEquals(want, s.sanitize(input, new Context(context))); + } + @Test public static final void testIssue9StylesInTables() { String input = "" diff --git a/src/test/java/org/owasp/html/StylingPolicyTest.java b/src/test/java/org/owasp/html/StylingPolicyTest.java index 89d68f92..9731b73c 100644 --- a/src/test/java/org/owasp/html/StylingPolicyTest.java +++ b/src/test/java/org/owasp/html/StylingPolicyTest.java @@ -32,8 +32,6 @@ import org.junit.Test; -import com.google.common.base.Function; - import junit.framework.TestCase; @SuppressWarnings("javadoc") @@ -348,16 +346,18 @@ private static void assertSanitizedCss( @Nullable String expectedCss, String css) { StylingPolicy stylingPolicy = new StylingPolicy( CssSchema.DEFAULT, - new Function<String, String>() { - public String apply(String url) { + new UrlRewriter() { + public String rewrite(String url, Context context) { String safeUrl = - StandardUrlAttributePolicy.INSTANCE.apply("img", "src", url); + StandardUrlAttributePolicy.INSTANCE.apply( + "img", "src", url, context); if (safeUrl != null) { return safeUrl + "#sanitized"; } return null; } }); - assertEquals(expectedCss, stylingPolicy.sanitizeCssProperties(css)); + assertEquals(expectedCss, stylingPolicy.sanitizeCssProperties( + css, Context.DEFAULT)); } }