diff --git a/README.md b/README.md index 2cefb57..b568d07 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,36 @@ var input = '...'; var result = purifier.purify(input); ``` +## Advanced Usage + +The following outlines the configuration that is secure by default. You should perform due dilligence to confirm your use cases are safe before disabling or altering the configurations. + +```js +// The default configuration +new Purifier({ + whitelistTags: ['a', '...'], + whitelistAttributes: ['href', '...'], + enableCanonicalization: true, + tagBalance: { + enabled: true, + stackSize: 100 + } +}); +``` + + + +#### tagBalance +The untrusted data must be self-contained. Hence, it cannot close any tags prior to its inclusion, nor leave any of its own tags unclosed. An efficient and simple tag balancing algorithm is applied by default to enforce this goal only, and may not produce perfectly nested output. You may implement another tag balancing algorithm before invoking purify. But the default one should still be enabled, unless you're sure the self-contained requirement is met. + +The ``stackSize`` (default: 100) is a limit imposed on the maximum number of unclosed tags (or the max levels of nested tags). When an untrusted data attempts to open tags that are so nested and has exceeded the allowed limit, the algorithm will cease any further processing but simply close all of those tags. + ## Development ### How to build diff --git a/src/html-purify.js b/src/html-purify.js index 21fe11d..f944221 100755 --- a/src/html-purify.js +++ b/src/html-purify.js @@ -11,16 +11,28 @@ See the accompanying LICENSE file for terms. derivedState = require('./derived-states.js'), xssFilters = require('xss-filters'), CssParser = require('css-js'), - hrefAttribtues = tagAttList.HrefAttributes, - voidElements = tagAttList.VoidElements; + hrefAttributes = tagAttList.HrefAttributes, + voidElements = tagAttList.VoidElements, + optionalElements = tagAttList.OptionalElements; + /*jshint -W030 */ function Purifier(config) { - var that = this; + var that = this, tagBalance; config = config || {}; // defaulted to true config.enableCanonicalization = config.enableCanonicalization !== false; - config.enableTagBalancing = config.enableTagBalancing !== false; + config.enableVoidingIEConditionalComments = config.enableVoidingIEConditionalComments !== false; + + // defaulted to true + config.tagBalance || (config.tagBalance = {}); + tagBalance = that.tagBalance = {}; + tagBalance.stackOverflow = false; + if ((tagBalance.enabled = config.tagBalance.enabled !== false)) { + tagBalance.stackPtrMax = (parseInt(config.tagBalance.stackSize) || 100) - 1; + tagBalance.stackPtr = 0; + tagBalance.stack = new Array(tagBalance.stackPtrMax + 1); + } // accept array of tags to be whitelisted, default list in tag-attr-list.js that.tagsWhitelist = config.whitelistTags || tagAttList.WhiteListTags; @@ -35,28 +47,36 @@ See the accompanying LICENSE file for terms. enableCanonicalization: config.enableCanonicalization, enableVoidingIEConditionalComments: false // comments are always stripped from the output anyway }).on('postWalk', function (lastState, state, i, endsWithEOF) { - processTransition.call(that, lastState, state, i); + !tagBalance.stackOverflow && processTransition.call(that, lastState, state, i); }); that.cssParser = new CssParser({"ver": "strict", "throwError": false}); + } - // TODO: introduce polyfill for Array.indexOf - function contains(arr, element) { - for (var i = 0, len = arr.length; i < len; i++) { - if (arr[i] === element) { - return true; + // A simple polyfill for Array.lastIndexOf + function arrayLastIndexOf(arr, element, fromIndex) { + if (arguments.length < 3) { + fromIndex = arr.length - 1; + } + + if (Array.prototype.lastIndexOf) { + return arr.lastIndexOf(element, fromIndex); + } + for (; fromIndex >= 0; fromIndex--) { + if (arr[fromIndex] === element) { + return fromIndex; } } - return false; + return -1; } function processTransition(prevState, nextState, i) { /* jshint validthis: true */ /* jshint expr: true */ var parser = this.parser, - idx, tagName, attrValString, openedTag, key, value; - + tagBalance = this.tagBalance, + idx = 0, tagName = '', attrValString = '', key = '', value = '', hasSelfClosing = 0; switch (derivedState.Transitions[prevState][nextState]) { @@ -68,30 +88,52 @@ See the accompanying LICENSE file for terms. idx = parser.getCurrentTagIndex(); tagName = parser.getCurrentTag(idx); - if (contains(this.tagsWhitelist, tagName)) { + if (arrayLastIndexOf(this.tagsWhitelist, tagName) !== -1) { if (idx) { - if (this.config.enableTagBalancing) { - // add closing tags for any opened ones before closing the current one - while((openedTag = this.openedTags.pop()) && openedTag !== tagName) { - this.output += ''; - } - // openedTag is undefined if tagName is never found in all openedTags, no output needed - if (openedTag) { - this.output += ''; + if (tagBalance.enabled && !optionalElements[tagName]) { + + // Simple tag balancing: close the tag as long as it + // exists in the stack, as we only want to ensure the + // untrusted data must be self-contained. Hence, it can + // not close any tags prior to its inclusion, nor leave + // any of its own tags unclosed. + idx = arrayLastIndexOf(tagBalance.stack, tagName, tagBalance.stackPtr - 1); + + if (idx >= 0) { + this.output += ''; + tagBalance.stack.splice(idx, 1); + tagBalance.stackPtr--; } + + // Pop-until-matched tag balancing: add closing tags for any opened ones before closing the matched one + // while((openedTag = this.openedTags.pop()) && openedTag !== tagName) { + // this.output += ''; + // } + // // openedTag is undefined if tagName is never found in all openedTags, no output needed + // if (openedTag) { + // this.output += ''; + // } } else { this.output += ''; } } else { - // - void elements only have a start tag; end tags must not be specified for void elements. - this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName]; + // void elements only have a start tag; end tags must not be specified for void elements. + hasSelfClosing = voidElements[tagName]; // push the tagName into the openedTags stack if not found: // - a self-closing tag or a void element - this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName); + if (tagBalance.enabled && !hasSelfClosing && !optionalElements[tagName]) { + // cease further processing if it exceeds the maximum stack size allowed + if (tagBalance.stackPtr > tagBalance.stackPtrMax) { + tagBalance.stackOverflow = true; + return; + } + + tagBalance.stack[tagBalance.stackPtr++] = tagName; + } if (prevState === 35 || prevState === 36 || @@ -99,9 +141,8 @@ See the accompanying LICENSE file for terms. this.attrVals[parser.getAttributeName()] = parser.getAttributeValue(); } - attrValString = ''; for (key in this.attrVals) { - if (contains(this.attributesWhitelist, key)) { + if (arrayLastIndexOf(this.attributesWhitelist, key) !== -1) { value = this.attrVals[key]; if (key === "style") { // TODO: move style to a const @@ -116,19 +157,19 @@ See the accompanying LICENSE file for terms. attrValString += ' ' + key; if (value !== null) { - attrValString += '="' + (hrefAttribtues[key] ? xssFilters.uriInDoubleQuotedAttr(decodeURI(value)) : value) + '"'; + attrValString += '="' + (hrefAttributes[key] ? xssFilters.uriInDoubleQuotedAttr(decodeURI(value)) : value) + '"'; } } } // handle self-closing tags - this.output += '<' + tagName + attrValString + (this.hasSelfClosing ? ' />' : '>'); + this.output += '<' + tagName + attrValString + (hasSelfClosing ? ' />' : '>'); + // this.output += '<' + tagName + attrValString + '>'; } } // reinitialize once tag has been written to output this.attrVals = {}; - this.hasSelfClosing = 0; break; case derivedState.TransitionName.ATTR_TO_AFTER_ATTR: @@ -141,32 +182,47 @@ See the accompanying LICENSE file for terms. //case derivedState.TransitionName.TAG_OPEN_TO_MARKUP_OPEN: // this.output += "<" + parser.input[i]; - // break; + // break; case derivedState.TransitionName.TO_SELF_CLOSING_START: // boolean attributes may not have a value if (prevState === 35) { this.attrVals[parser.getAttributeName()] = null; } - this.hasSelfClosing = 1; + + /* According to https://html.spec.whatwg.org/multipage/syntax.html#start-tags + * "Then, if the element is one of the void elements, or if the element is a foreign element, then there may be a single U+002F SOLIDUS character (/). + * This character has no effect on void elements, but on foreign elements it marks the start tag as self-closing." + */ + // that means only foreign elements can self-close (self-closing is optional for void elements) + // no foreign elements will be allowed, so the following logic can be commented + // openedTag = parser.getStartTagName(); + // if (openedTag === 'svg' || openedTag === 'math') { // ... + // this.hasSelfClosing = true; + // } + break; } } Purifier.prototype.purify = function (data) { - var that = this, openedTag; + var that = this, i, + tagBalance = that.tagBalance; - that.output = ''; - that.openedTags = []; that.attrVals = {}; - that.hasSelfClosing = 0; - that.parser.reset(); - that.parser.contextualize(data); - - if (that.config.enableTagBalancing) { - // close any remaining openedTags - while((openedTag = this.openedTags.pop())) { - that.output += ''; + that.output = ''; + + if (tagBalance.enabled) { + tagBalance.stack = new Array(tagBalance.stackPtrMax + 1); + tagBalance.stackPtr = 0; + } + + that.parser.reset().contextualize(data); + + if (tagBalance.enabled) { + // close remaining opened tags, if any + for (i = tagBalance.stackPtr - 1; i >= 0; i--) { + that.output += ''; } } diff --git a/src/tag-attr-list.js b/src/tag-attr-list.js index 04cacbb..eac58ec 100644 --- a/src/tag-attr-list.js +++ b/src/tag-attr-list.js @@ -261,3 +261,6 @@ exports.HrefAttributes = { // Void elements only have a start tag; end tags must not be specified for void elements. // https://html.spec.whatwg.org/multipage/syntax.html#void-elements exports.VoidElements = {"area":1, "base":1, "br":1, "col":1, "embed":1, "hr":1, "img":1, "input":1, "keygen":1, "link":1, "menuitem":1, "meta":1, "param":1, "source":1, "track":1, "wbr":1}; + +// https://html.spec.whatwg.org/multipage/syntax.html#optional-tags +exports.OptionalElements = {/*"plaintext":1, "html":1, "head":1, "body":1,*/ "li":1, "dt":1, "dd":1, "p":1, "rt":1, "rp":1, "optgroup":1, "option":1, "colgroup":1, "caption":1, "thead":1, "tbody":1, "tfoot":1, "tr":1, "td":1, "th":1}; diff --git a/tests/test-vectors.js b/tests/test-vectors.js index 7db885a..6d98c65 100644 --- a/tests/test-vectors.js +++ b/tests/test-vectors.js @@ -628,12 +628,12 @@ var generalVectors = [ { id: 10, input: "
normal tag made standalone with bogus trunc attr", - output: "
normal tag made standalone with bogus trunc attr" + output: "
normal tag made standalone with bogus trunc attr" }, { id: 11, input: "
normal tag made standalone with trunc attr bad val", - output: "
normal tag made standalone with trunc attr bad val" + output: "
normal tag made standalone with trunc attr bad val" }, { id: 12, @@ -648,7 +648,7 @@ var generalVectors = [ { id: 14, input: "
normal tag made standalone with value", - output: "
normal tag made standalone with value" + output: "
normal tag made standalone with value" }, //style attribute tests { @@ -779,7 +779,7 @@ var generalVectors = [ { id: 40, input: "