From 0fa927a5b663ba276c393f500eccd9015164236f Mon Sep 17 00:00:00 2001 From: inikulin Date: Fri, 15 Jan 2016 17:15:37 +0300 Subject: [PATCH] Always pipe SAXParser to the /dev/null stream to prevent hitting ReadableStream buffer limits if we don't have consumers (fixes #97) --- docs/07_version_history.md | 3 +++ lib/sax/dev_null_stream.js | 14 ++++++++++++++ lib/sax/index.js | 6 ++++++ package.json | 2 +- test/fixtures/sax_test.js | 11 +++++++++++ 5 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 lib/sax/dev_null_stream.js diff --git a/docs/07_version_history.md b/docs/07_version_history.md index 89debc83c..1018699c6 100644 --- a/docs/07_version_history.md +++ b/docs/07_version_history.md @@ -1,5 +1,8 @@ # Version history +## 2.1.2 + * Fixed: SAX parser silently exits on big files (GH [#97](https://github.com/inikulin/parse5/issues/97)) + ## 2.1.1 * Fixed: location info not attached for empty attributes (GH [#96](https://github.com/inikulin/parse5/issues/96)) (by [@yyx990803](https://github.com/yyx990803)). diff --git a/lib/sax/dev_null_stream.js b/lib/sax/dev_null_stream.js new file mode 100644 index 000000000..96bdcf331 --- /dev/null +++ b/lib/sax/dev_null_stream.js @@ -0,0 +1,14 @@ +'use strict'; + +var WritableStream = require('stream').Writable, + util = require('util'); + +var DevNullStream = module.exports = function () { + WritableStream.call(this); +}; + +util.inherits(DevNullStream, WritableStream); + +DevNullStream.prototype._write = function (chunk, encoding, cb) { + cb(); +}; diff --git a/lib/sax/index.js b/lib/sax/index.js index 4ffe40352..03c6a5d4a 100644 --- a/lib/sax/index.js +++ b/lib/sax/index.js @@ -1,6 +1,7 @@ 'use strict'; var TransformStream = require('stream').Transform, + DevNullStream = require('./dev_null_stream'), inherits = require('util').inherits, Tokenizer = require('../tokenizer'), ParserFeedbackSimulator = require('./parser_feedback_simulator'), @@ -59,6 +60,11 @@ var SAXParser = module.exports = function (options) { this.lastChunkWritten = false; this.stopped = false; + + // NOTE: always pipe stream to the /dev/null stream to avoid + // `highWaterMark` hit even if we don't have consumers. + // (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774) + this.pipe(new DevNullStream()); }; inherits(SAXParser, TransformStream); diff --git a/package.json b/package.json index 91a901fda..b0e4c93e2 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "parse5", "description": "WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node.js", - "version": "2.1.1", + "version": "2.1.2", "author": "Ivan Nikulin (https://github.com/inikulin)", "contributors": [ "Alan Clarke (https://github.com/alanclarke)", diff --git a/test/fixtures/sax_test.js b/test/fixtures/sax_test.js index a4123ca11..4bfb6a234 100644 --- a/test/fixtures/sax_test.js +++ b/test/fixtures/sax_test.js @@ -128,3 +128,14 @@ exports['SAX - Piping and .stop()'] = function (done) { done(); }); }; + +exports['Regression-SAX-SAX parser silently exits on big files (GH-97)'] = function (done) { + var parser = new SAXParser(); + + fs + .createReadStream(path.join(__dirname, '../data/huge-page/huge-page.html')) + .pipe(parser); + + //NOTE: This is a smoke test - in case of regression it will fail with timeout. + parser.once('finish', done); +};