Skip to content
This repository has been archived by the owner on Jul 15, 2019. It is now read-only.

Commit

Permalink
updated comments and README
Browse files Browse the repository at this point in the history
- also improved the arrayLastIndexOf to use the native
Array.prototype.lastIndexOf if exixts
  • Loading branch information
adon committed Oct 5, 2015
1 parent 4c0436d commit 3701cb7
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 31 deletions.
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,36 @@ var input = '...';
var result = purifier.purify(input);
```

## Advanced Usage

The following outlines the configuration that is secure by default. You should perform due dilligence to confirm your use cases are safe before disabling or altering the configurations.

```js
// The default configuration
new Purifier({
whitelistTags: ['a', '...'],
whitelistAttributes: ['href', '...'],
enableCanonicalization: true,
tagBalance: {
enabled: true,
stackSize: 100
}
});
```

<!--
#### whitelistTags
#### whitelistAttributes
#### enableCanonicalization
-->

#### tagBalance
The untrusted data must be self-contained. Hence, it cannot close any tags prior to its inclusion, nor leave any of its own tags unclosed. An efficient and simple tag balancing algorithm is applied by default to enforce this goal only, and may not produce perfectly nested output. You may implement another tag balancing algorithm before invoking purify. But the default one should still be enabled, unless you're sure the self-contained requirement is met.

The ``stackSize`` (default: 100) is a limit imposed on the maximum number of unclosed tags (or the max levels of nested tags). When an untrusted data attempts to open tags that are so nested and has exceeded the allowed limit, the algorithm will cease any further processing but simply close all of those tags.

## Development

### How to build
Expand Down
66 changes: 35 additions & 31 deletions src/html-purify.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ See the accompanying LICENSE file for terms.
tagBalance = that.tagBalance = {};
tagBalance.stackOverflow = false;
if ((tagBalance.enabled = config.tagBalance.enabled !== false)) {
tagBalance.stackSize = parseInt(config.tagBalance.stackSize) || 100;
tagBalance.stackPtrMax = (parseInt(config.tagBalance.stackSize) || 100) - 1;
tagBalance.stackPtr = 0;
tagBalance.stack = new Array(tagBalance.stackSize);
tagBalance.stack = new Array(tagBalance.stackPtrMax + 1);
}

// accept array of tags to be whitelisted, default list in tag-attr-list.js
Expand All @@ -54,11 +54,18 @@ See the accompanying LICENSE file for terms.

}

// TODO: introduce polyfill for Array.lastIndexOf
// A simple polyfill for Array.lastIndexOf
function arrayLastIndexOf(arr, element, fromIndex) {
for (var i = fromIndex === undefined ? arr.length - 1 : fromIndex; i >= 0; i--) {
if (arr[i] === element) {
return i;
if (arguments.length < 3) {
fromIndex = arr.length - 1;
}

if (Array.prototype.lastIndexOf) {
return arr.lastIndexOf(element, fromIndex);
}
for (; fromIndex >= 0; fromIndex--) {
if (arr[fromIndex] === element) {
return fromIndex;
}
}
return -1;
Expand All @@ -85,7 +92,12 @@ See the accompanying LICENSE file for terms.

if (idx) {
if (tagBalance.enabled && !optionalElements[tagName]) {
// relaxed tag balancing, accept it as long as the tag exists in the stack

// Simple tag balancing: close the tag as long as it
// exists in the stack, as we only want to ensure the
// untrusted data must be self-contained. Hence, it can
// not close any tags prior to its inclusion, nor leave
// any of its own tags unclosed.
idx = arrayLastIndexOf(tagBalance.stack, tagName, tagBalance.stackPtr - 1);

if (idx >= 0) {
Expand All @@ -94,7 +106,7 @@ See the accompanying LICENSE file for terms.
tagBalance.stackPtr--;
}

// // add closing tags for any opened ones before closing the current one
// Pop-until-matched tag balancing: add closing tags for any opened ones before closing the matched one
// while((openedTag = this.openedTags.pop()) && openedTag !== tagName) {
// this.output += '</' + openedTag + '>';
// }
Expand All @@ -109,20 +121,18 @@ See the accompanying LICENSE file for terms.
}
else {
// void elements only have a start tag; end tags must not be specified for void elements.
// this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
hasSelfClosing = voidElements[tagName];

// push the tagName into the openedTags stack if not found:
// - a self-closing tag or a void element
// this.config.tagBalance.enabled && !this.hasSelfClosing && this.openedTags.push(tagName);
if (tagBalance.enabled && !hasSelfClosing && !optionalElements[tagName]) {
if (tagBalance.stackPtr < tagBalance.stackSize) {
tagBalance.stack[tagBalance.stackPtr++] = tagName;
} else {
// cease processing anything if it exceeds the maximum stack size allowed
// cease further processing if it exceeds the maximum stack size allowed
if (tagBalance.stackPtr > tagBalance.stackPtrMax) {
tagBalance.stackOverflow = true;
break;
return;
}

tagBalance.stack[tagBalance.stackPtr++] = tagName;
}

if (prevState === 35 ||
Expand Down Expand Up @@ -172,7 +182,7 @@ See the accompanying LICENSE file for terms.

//case derivedState.TransitionName.TAG_OPEN_TO_MARKUP_OPEN:
// this.output += "<" + parser.input[i];
// break;
// break;

case derivedState.TransitionName.TO_SELF_CLOSING_START:
// boolean attributes may not have a value
Expand All @@ -196,30 +206,24 @@ See the accompanying LICENSE file for terms.
}

Purifier.prototype.purify = function (data) {
var that = this, i;
var that = this, i,
tagBalance = that.tagBalance;

that.attrVals = {};
that.output = '';

if (that.tagBalance.enabled) {
that.tagBalance.stack = new Array(this.tagBalance.stackSize);
that.tagBalance.stackPtr = 0;
if (tagBalance.enabled) {
tagBalance.stack = new Array(tagBalance.stackPtrMax + 1);
tagBalance.stackPtr = 0;
}

that.parser.reset().contextualize(data);

if (that.tagBalance.enabled) {

// close any remaining openedTags
for (i = that.tagBalance.stackPtr - 1; i >= 0; i--) {
that.output += '</' + that.tagBalance.stack[i] + '>';
if (tagBalance.enabled) {
// close remaining opened tags, if any
for (i = tagBalance.stackPtr - 1; i >= 0; i--) {
that.output += '</' + tagBalance.stack[i] + '>';
}
// if ((that.tagBalance.stack.length = that.tagBalance.stackPtr)) {
// that.output += '</' + that.tagBalance.stack.join('></') + '>';
// }
// while((openedTag = this.openedTags.pop())) {
// that.output += '</' + openedTag + '>';
// }
}

return that.output;
Expand Down

0 comments on commit 3701cb7

Please sign in to comment.