Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add DOM Parser #2

Open
wants to merge 3 commits into
base: task/typedefs
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 15 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@
"main": "dist/main/js/main.js",
"types": "dist/main/js/main.d.ts",
"unpkg": "dist/imsc.min.js",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js"
},
"./*.js": {
"types": "./dist/*.d.ts",
"import": "./dist/*.js"
},
"./*": {
"types": "./dist/*.d.ts",
"import": "./dist/*.js"
}
},
Comment on lines +31 to +44
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added granular file imports for anyone installing imscJs via npm. This way fromXML can be imported directly, and avoid bundling sax unless createSAXParser is explicitly imported.

"scripts": {
"prepublishOnly": "grunt build:release",
"dev": "npx http-server build/public_html",
Expand All @@ -37,7 +51,7 @@
"test": "node --test ./src/test/js/*Test.js"
},
"dependencies": {
"sax": "1.2.1"
"sax": "^1.4.1"
},
"devDependencies": {
"@eslint/js": "^9.1.1",
Expand Down
20 changes: 9 additions & 11 deletions src/main/js/doc.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/

import sax from "sax";
import { reportError, reportFatal, reportWarning } from "./error.js";
import { ns_ebutts, ns_ittp, ns_itts, ns_tt, ns_ttp, ns_tts } from "./names.js";
import { createDOMParser } from "./parser.js";
import { byName, byQName } from "./styles.js";
import { ComputedLength, hasOwnProperty, parseLength } from "./utils.js";

Expand All @@ -36,10 +36,8 @@ import { ComputedLength, hasOwnProperty, parseLength } from "./utils.js";

/**
* @typedef {import("./error").ErrorHandler} ErrorHandler
*/

/**
* @typedef {sax.Tag | sax.QualifiedTag} Node
* @typedef {import("./parser").Node} Node
* @typedef {import("./parser").Parser} Parser
*/

/**
Expand Down Expand Up @@ -80,18 +78,18 @@ import { ComputedLength, hasOwnProperty, parseLength } from "./utils.js";
* @param {string} xmlstring XML document
* @param {ErrorHandler} errorHandler Error callback
* @param {?MetadataHandler} metadataHandler Callback for <Metadata> elements
* @param {?Parser} parser XML parser
* @returns {?TT} Opaque in-memory representation of an IMSC1 document
*/

export function fromXML(xmlstring, errorHandler, metadataHandler) {
const p = sax.parser(true, { xmlns: true });
export function fromXML(xmlstring, errorHandler, metadataHandler, parser = createDOMParser()) {
Comment on lines +81 to +85
Copy link
Owner Author

@littlespex littlespex Jun 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Allow the parser to be passed in as long as it adheres to the interface. The also changes the default to the DOMParser approach.

const estack = [];
const xmllangstack = [];
const xmlspacestack = [];
let metadata_depth = 0;
let doc = null;

p.onclosetag = function () {
parser.onclosetag = function () {

if (estack[0] instanceof Region) {

Expand Down Expand Up @@ -191,7 +189,7 @@ export function fromXML(xmlstring, errorHandler, metadataHandler) {
estack.shift();
};

p.ontext = function (str) {
parser.ontext = function (str) {

if (estack[0] === undefined) {

Expand Down Expand Up @@ -234,7 +232,7 @@ export function fromXML(xmlstring, errorHandler, metadataHandler) {

};

p.onopentag = function (node) {
parser.onopentag = function (node) {

// maintain the xml:space stack

Expand Down Expand Up @@ -604,7 +602,7 @@ export function fromXML(xmlstring, errorHandler, metadataHandler) {

// parse the document

p.write(xmlstring).close();
parser.write(xmlstring).close();

// all referential styling has been flatten, so delete styles

Expand Down
1 change: 1 addition & 0 deletions src/main/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@
export { fromXML } from "./doc.js";
export { renderHTML } from "./html.js";
export { generateISD } from "./isd.js";
export { createDOMParser, createSAXParser } from "./parser.js";
98 changes: 98 additions & 0 deletions src/main/js/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import sax from "sax";

/**
* @typedef {sax.Tag | sax.QualifiedTag} Node
*/

/**
* @typedef {Object} Parser
* @property {(xml: string) => Parser} write
* @property {() => Parser} close
* @property {(node: Node) => void} onopentag
* @property {(text: string) => void} ontext
* @property {() => void} onclosetag
*/

export class XMLParser {
Comment on lines +7 to +16
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid a massive refactor of fromXML(), the parser interface matches sax interface, and the XMLParser simply crawls the result of DOMParser and mimics the sax behavior.

/**
* @param {Element} element
* @returns {SAX}
*/
static toNode(element) {
const attrs = element.attributes;
const node = XMLParser.toNS(element);
node.attributes = {};

for (let i = 0, len = attrs.length; i < len; i++) {
const attr = attrs[i];
node.attributes[attr.name] = XMLParser.toNS(attr);
}

return node;
}

static toNS(node) {
return {
name: node.nodeName,
prefix: node.prefix,
local: node.localName,
uri: node.namespaceURI,
value: node.value,
};
}

onopentag = (node) => { console.log(node); }
ontext = (str) => { console.log(str); }
onclosetag = () => { }

write(xmlstring) {
const parser = new DOMParser();
const doc = parser.parseFromString(xmlstring, "application/xml");
const errorNode = doc.querySelector("parsererror");

if (errorNode) {
throw new Error("XML parsing error: " + errorNode.textContent);
}

this.process(doc.firstChild);

return this;
}

process(element) {
const node = XMLParser.toNode(element);
this.onopentag(node);

const children = element.childNodes;

for (let i = 0, len = children.length; i < len; i++) {
const child = children[i];

if (child.nodeType === Node.TEXT_NODE) {
this.ontext(child.textContent);
} else if (child.nodeType === Node.ELEMENT_NODE) {
this.process(child);
}
}

this.onclosetag();
}

close() {
return this;
}
}

/**
* @returns {Parser}
*/
export function createDOMParser() {
return new XMLParser();
}

/**
* @returns {Parser}
*/
export function createSAXParser() {
return sax.parser(true, { xmlns: true });
}
3 changes: 2 additions & 1 deletion src/test/js/utils/getIMSC1Document.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import fs from "node:fs/promises";
import { fromXML } from "../../../main/js/doc.js";
import { createSAXParser } from "../../../main/js/parser.js";

const errorHandler = {
info: function (msg) {
Expand All @@ -18,5 +19,5 @@ const errorHandler = {

export async function getIMSC1Document(url, metadataHandler) {
const contents = await fs.readFile(url, "utf8");
return fromXML(contents, errorHandler, metadataHandler);
return fromXML(contents, errorHandler, metadataHandler, createSAXParser());
}
12 changes: 8 additions & 4 deletions src/test/webapp/gen-renders.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
<div id="main">

<section id="start">
<button id="start-rendering" onclick="generateRenders('imsc-tests/imsc1')">Generate IMSC 1 reference
renders</button>
<button id="start-rendering" onclick="generateRenders('imsc-tests/imsc1_1')">Generate IMSC 1.1 reference
renders</button>
<button onclick="generateRenders('imsc-tests/imsc1', true)">Generate IMSC 1 reference
renders - SAX</button>
<button onclick="generateRenders('imsc-tests/imsc1_1', true)">Generate IMSC 1.1 reference
renders - SAX</button>
<button onclick="generateRenders('imsc-tests/imsc1', false)">Generate IMSC 1 reference
renders - DOM</button>
<button onclick="generateRenders('imsc-tests/imsc1_1', false)">Generate IMSC 1.1 reference
renders- DOM</button>
Comment on lines +23 to +30
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added buttons for both parsers and made sure the generated files match.

</section>

<section id="visual">
Expand Down
9 changes: 5 additions & 4 deletions src/test/webapp/js/gen-renders.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ var errorHandler = {

/* */

function generateRenders(reffiles_root) {
function generateRenders(reffiles_root, use_sax) {

var zip = new JSZip();

Expand All @@ -61,7 +61,7 @@ function generateRenders(reffiles_root) {

for (var i in finfos) {

p.push(asyncProcessRefFile(reffiles_root, renders_dir, pngs_dir, finfos[i]));
p.push(asyncProcessRefFile(reffiles_root, renders_dir, pngs_dir, finfos[i], use_sax));

}

Expand All @@ -85,7 +85,7 @@ function generateRenders(reffiles_root) {

}

function asyncProcessRefFile(reffiles_root, renders_dir, pngs_dir, finfo) {
function asyncProcessRefFile(reffiles_root, renders_dir, pngs_dir, finfo, use_sax) {

var test_name = finfo.name || getTestName(finfo.path, finfo.params || {});

Expand All @@ -94,7 +94,8 @@ function asyncProcessRefFile(reffiles_root, renders_dir, pngs_dir, finfo) {

return asyncLoadFile(getReferenceFilePath(reffiles_root, finfo.path))
.then(function (contents) {
var doc = imsc.fromXML(contents.replace(/\r\n/g, '\n'), errorHandler);
var parser = use_sax ? imsc.createSAXParser() : imsc.createDOMParser();
var doc = imsc.fromXML(contents.replace(/\r\n/g, '\n'), errorHandler, parser);

test_renders_dir.file("doc.json",
JSON.stringify(
Expand Down