Skip to content
This repository has been archived by the owner on Jul 15, 2019. It is now read-only.

Commit

Permalink
updated after reviewer's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
adon committed Aug 18, 2015
1 parent 8243352 commit fa24fa2
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 28 deletions.
78 changes: 53 additions & 25 deletions src/html-purify.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,25 @@ See the accompanying LICENSE file for terms.
voidElements = tagAttList.VoidElements,
optionalElements = tagAttList.OptionalElements;

/*jshint -W030 */
function Purifier(config) {
var that = this;
var that = this, tagBalance;

config = config || {};
// defaulted to true
config.enableCanonicalization = config.enableCanonicalization !== false;
config.enableVoidingIEConditionalComments = config.enableVoidingIEConditionalComments !== false;

// defaulted to true
config.tagBalance || (config.tagBalance = {});
tagBalance = that.tagBalance = {};
tagBalance.stackOverflow = false;
if ((tagBalance.enabled = config.tagBalance.enabled !== false)) {
tagBalance.stackSize = parseInt(config.tagBalance.stackSize) || 100;
tagBalance.stackPtr = 0;
tagBalance.stack = new Array(tagBalance.stackSize);
}

config.enableTagBalancing = config.enableTagBalancing !== false;

// accept array of tags to be whitelisted, default list in tag-attr-list.js
that.tagsWhitelist = config.whitelistTags || tagAttList.WhiteListTags;
Expand All @@ -38,15 +48,16 @@ See the accompanying LICENSE file for terms.
enableCanonicalization: config.enableCanonicalization,
enableVoidingIEConditionalComments: config.enableVoidingIEConditionalComments
}).on('postWalk', function (lastState, state, i, endsWithEOF) {
processTransition.call(that, lastState, state, i);
!tagBalance.stackOverflow && processTransition.call(that, lastState, state, i);
});

that.cssParser = new CssParser({"ver": "strict", "throwError": false});

}

// TODO: introduce polyfill for Array.lastIndexOf
function arrayLastIndexOf(arr, element) {
for (var i = arr.length - 1; i >= 0; i--) {
function arrayLastIndexOf(arr, element, fromIndex) {
for (var i = fromIndex === undefined ? arr.length - 1 : fromIndex; i >= 0; i--) {
if (arr[i] === element) {
return i;
}
Expand All @@ -58,8 +69,8 @@ See the accompanying LICENSE file for terms.
/* jshint validthis: true */
/* jshint expr: true */
var parser = this.parser,
idx, tagName, attrValString, openedTag, key, value;

tagBalance = this.tagBalance,
idx = 0, tagName = '', attrValString = '', key = '', value = '', hasSelfClosing = 0;

switch (derivedState.Transitions[prevState][nextState]) {

Expand All @@ -74,13 +85,14 @@ See the accompanying LICENSE file for terms.
if (arrayLastIndexOf(this.tagsWhitelist, tagName) !== -1) {

if (idx) {
if (this.config.enableTagBalancing && !optionalElements[tagName]) {
if (tagBalance.enabled && !optionalElements[tagName]) {
// relaxed tag balancing, accept it as long as the tag exists in the stack
idx = arrayLastIndexOf(this.openedTags, tagName);
idx = arrayLastIndexOf(tagBalance.stack, tagName, tagBalance.stackPtr - 1);

if (idx >= 0) {
this.output += '</' + tagName + '>';
this.openedTags.splice(idx, 1);
tagBalance.stack.splice(idx, 1);
tagBalance.stackPtr--;
}

// // add closing tags for any opened ones before closing the current one
Expand All @@ -99,20 +111,27 @@ See the accompanying LICENSE file for terms.
else {
// void elements only have a start tag; end tags must not be specified for void elements.
// this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
this.hasSelfClosing = voidElements[tagName];
hasSelfClosing = voidElements[tagName];

// push the tagName into the openedTags stack if not found:
// - a self-closing tag or a void element
// this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName);
this.config.enableTagBalancing && !this.hasSelfClosing && !optionalElements[tagName] && this.openedTags.push(tagName);
// this.config.tagBalance.enabled && !this.hasSelfClosing && this.openedTags.push(tagName);
if (tagBalance.enabled && !hasSelfClosing && !optionalElements[tagName]) {
if (tagBalance.stackPtr < tagBalance.stackSize) {
tagBalance.stack[tagBalance.stackPtr++] = tagName;
} else {
// cease processing anything if it exceeds the maximum stack size allowed
tagBalance.stackOverflow = true;
break;
}
}

if (prevState === 35 ||
prevState === 36 ||
prevState === 40) {
this.attrVals[parser.getAttributeName()] = parser.getAttributeValue();
}

attrValString = '';
for (key in this.attrVals) {
if (arrayLastIndexOf(this.attributesWhitelist, key) !== -1) {
value = this.attrVals[key];
Expand All @@ -135,14 +154,13 @@ See the accompanying LICENSE file for terms.
}

// handle self-closing tags
this.output += '<' + tagName + attrValString + (this.hasSelfClosing ? ' />' : '>');
this.output += '<' + tagName + attrValString + (hasSelfClosing ? ' />' : '>');
// this.output += '<' + tagName + attrValString + '>';

}
}
// reinitialize once tag has been written to output
this.attrVals = {};
// this.hasSelfClosing = false;
break;

case derivedState.TransitionName.ATTR_TO_AFTER_ATTR:
Expand Down Expand Up @@ -179,20 +197,30 @@ See the accompanying LICENSE file for terms.
}

Purifier.prototype.purify = function (data) {
var that = this, openedTag;
var that = this, i;

that.output = '';
that.openedTags = [];
that.attrVals = {};
// that.hasSelfClosing = false;
that.parser.reset();
that.parser.contextualize(data);
that.output = '';

if (that.tagBalance.enabled) {
that.tagBalance.stack = new Array(this.tagBalance.stackSize);
that.tagBalance.stackPtr = 0;
}

that.parser.reset().contextualize(data);

if (that.tagBalance.enabled) {

if (that.config.enableTagBalancing) {
// close any remaining openedTags
while((openedTag = this.openedTags.pop())) {
that.output += '</' + openedTag + '>';
for (i = that.tagBalance.stackPtr - 1; i >= 0; i--) {
that.output += '</' + that.tagBalance.stack[i] + '>';
}
// if ((that.tagBalance.stack.length = that.tagBalance.stackPtr)) {
// that.output += '</' + that.tagBalance.stack.join('></') + '>';
// }
// while((openedTag = this.openedTags.pop())) {
// that.output += '</' + openedTag + '>';
// }
}

return that.output;
Expand Down
19 changes: 16 additions & 3 deletions tests/unit/html-purify.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,31 @@ Authors: Aditya Mahendrakar <[email protected]>
assert.equal(output, '<h1 id="foo" title="asd" checked>hello world 2</h1>');
});

it('should always balance unopened tags', function(){
it('should balance tags', function(){
var html = "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed>123</embed><br /><br/><p>";

// with tag balancing enabled by default
var output = (new Purifier({enableTagBalancing:true})).purify(html);
var output = (new Purifier({tagBalance:{enabled:true}})).purify(html);
assert.equal(output, 'foobar<a href="123">hello<b>world</a><embed />123<br /><br /><p></b>');
});

it('should balance remaining tags and drop inputs when there are too many unclosed tags', function(){
var html = "<b>1<b>2<b>3<b>4<b>5<b>6</b></b></b></b>";

// with tag balancing enabled by default
var output = (new Purifier({tagBalance:{enabled:true, stackSize:3}})).purify(html);
assert.equal(output, '<b>1<b>2<b>3</b></b></b>');
});

it('should not balance tags if disabled', function(){
var html = "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed>123</embed><br /><br/><p>";

// with tag balancing disabled
var output = (new Purifier({enableTagBalancing:false})).purify(html);
var output = (new Purifier({tagBalance:{enabled:false}})).purify(html);
assert.equal(output, '</div>foo</h2>bar<a href="123">hello<b>world</a><embed />123</embed><br /><br /><p>');
});


it('should handle all vectors mentioned in https://html5sec.org', function(){
var output, i, vector;
for (var i = 0; i < html5secVectors.length; i++) {
Expand Down

0 comments on commit fa24fa2

Please sign in to comment.