Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing line breaks and tabulators to innerText #1

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
.nyc_output
coverage/
node_modules/
.idea
.idea
linkedom-*.tgz
8 changes: 8 additions & 0 deletions .zed/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Folder-specific settings
//
// For a full list of overridable settings, and general information on folder-specific settings,
// see the documentation: https://zed.dev/docs/configuring-zed#settings-files
{
"tab_size": 2,
"format_on_save": "off"
}
42 changes: 40 additions & 2 deletions cjs/interface/element.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
const {
ATTRIBUTE_NODE,
BLOCK_ELEMENTS,
TABLE_ELEMENTS,
CDATA_SECTION_NODE,
COMMENT_NODE,
ELEMENT_NODE,
Expand Down Expand Up @@ -157,22 +158,59 @@ class Element extends ParentNode {
// </specialGetters>


// <contentRelated>
get innerText() {
__getInnerText(customGetter) {
const text = [];
let {[NEXT]: next, [END]: end} = this;



while (next !== end) {
if (typeof customGetter === "function") {
const custom = customGetter(next);
if (typeof custom === "string") {
text.push(custom)
next = next[NEXT];
continue;
}
}

// Add tabulators between table columns
if (
TABLE_ELEMENTS.has(next.tagName) &&
TABLE_ELEMENTS.has(next.previousElementSibling?.tagName)) {
text.push('\t');
}

if (next.nodeType === TEXT_NODE) {
text.push(next.textContent.replace(/\s+/g, ' '));
} else if(
text.length && next[NEXT] != end &&
BLOCK_ELEMENTS.has(next.tagName)
&& text.at(-1) !== '\n'
) {
text.push('\n');
}

// add line breaks on closing elements
if (
text.at(-1) !== '\n' &&
(!next.nextSibling || BLOCK_ELEMENTS.has(next.nextSibling?.tagName)) &&
(!next.previousSibling || BLOCK_ELEMENTS.has(next.previousSibling?.tagName)) &&
BLOCK_ELEMENTS.has(next.parentElement?.tagName)
) {
text.push('\n');
}

next = next[NEXT];

}
return text.join('');

}

// <contentRelated>
get innerText() {
return this.__getInnerText()
}

/**
Expand Down
5 changes: 4 additions & 1 deletion cjs/shared/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ const DOCUMENT_FRAGMENT_NODE = 11;
exports.DOCUMENT_FRAGMENT_NODE = DOCUMENT_FRAGMENT_NODE;

// Elements
const BLOCK_ELEMENTS = new Set(['ARTICLE', 'ASIDE', 'BLOCKQUOTE', 'BODY', 'BR', 'BUTTON', 'CANVAS', 'CAPTION', 'COL', 'COLGROUP', 'DD', 'DIV', 'DL', 'DT', 'EMBED', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'LI', 'UL', 'OL', 'P']);
const BLOCK_ELEMENTS = new Set(['ARTICLE', 'ASIDE', 'BLOCKQUOTE', 'BODY', 'BR', 'BUTTON', 'CANVAS', 'CAPTION', 'COL', 'COLGROUP', 'DD', 'DIV', 'DL', 'DT', 'EMBED', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'LI', 'UL', 'OL', 'P', 'TR', 'PRE', 'HR', 'ADDRESS']);
exports.BLOCK_ELEMENTS = BLOCK_ELEMENTS;

const TABLE_ELEMENTS = new Set(['TH', 'TD']);
exports.TABLE_ELEMENTS = TABLE_ELEMENTS;

// TreeWalker
const SHOW_ALL = -1;
exports.SHOW_ALL = SHOW_ALL;
Expand Down
42 changes: 40 additions & 2 deletions esm/interface/element.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import {
ATTRIBUTE_NODE,
BLOCK_ELEMENTS,
TABLE_ELEMENTS,
CDATA_SECTION_NODE,
COMMENT_NODE,
ELEMENT_NODE,
Expand Down Expand Up @@ -159,22 +160,59 @@ export class Element extends ParentNode {
// </specialGetters>


// <contentRelated>
get innerText() {
__getInnerText(customGetter) {
const text = [];
let {[NEXT]: next, [END]: end} = this;



while (next !== end) {
if (typeof customGetter === "function") {
const custom = customGetter(next);
if (typeof custom === "string") {
text.push(custom)
next = next[NEXT];
continue;
}
}

// Add tabulators between table columns
if (
TABLE_ELEMENTS.has(next.tagName) &&
TABLE_ELEMENTS.has(next.previousElementSibling?.tagName)) {
text.push('\t');
}

if (next.nodeType === TEXT_NODE) {
text.push(next.textContent.replace(/\s+/g, ' '));
} else if(
text.length && next[NEXT] != end &&
BLOCK_ELEMENTS.has(next.tagName)
&& text.at(-1) !== '\n'
) {
text.push('\n');
}

// add line breaks on closing elements
if (
text.at(-1) !== '\n' &&
(!next.nextSibling || BLOCK_ELEMENTS.has(next.nextSibling?.tagName)) &&
(!next.previousSibling || BLOCK_ELEMENTS.has(next.previousSibling?.tagName)) &&
BLOCK_ELEMENTS.has(next.parentElement?.tagName)
) {
text.push('\n');
}

next = next[NEXT];

}
return text.join('');

}

// <contentRelated>
get innerText() {
return this.__getInnerText()
}

/**
Expand Down
4 changes: 3 additions & 1 deletion esm/shared/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ export const DOCUMENT_TYPE_NODE = 10;
export const DOCUMENT_FRAGMENT_NODE = 11;

// Elements
export const BLOCK_ELEMENTS = new Set(['ARTICLE', 'ASIDE', 'BLOCKQUOTE', 'BODY', 'BR', 'BUTTON', 'CANVAS', 'CAPTION', 'COL', 'COLGROUP', 'DD', 'DIV', 'DL', 'DT', 'EMBED', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'LI', 'UL', 'OL', 'P']);
export const BLOCK_ELEMENTS = new Set(['ARTICLE', 'ASIDE', 'BLOCKQUOTE', 'BODY', 'BR', 'BUTTON', 'CANVAS', 'CAPTION', 'COL', 'COLGROUP', 'DD', 'DIV', 'DL', 'DT', 'EMBED', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'LI', 'UL', 'OL', 'P', 'TR', 'PRE', 'HR', 'ADDRESS']);

export const TABLE_ELEMENTS = new Set(['TH', 'TD']);

// TreeWalker
export const SHOW_ALL = -1;
Expand Down
53 changes: 52 additions & 1 deletion test/html/element.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,5 +173,56 @@ node.innerHTML = '<video src="" controls>';
assert(node.innerHTML, '<video src="" controls></video>');

node.innerHTML = '<div>The <strong>quick</strong> brown fox</div><div>Jumped over<br>The lazy\ndog</div>';
assert(node.innerText, 'The quick brown fox\nJumped over\nThe lazy dog', 'innerText newlines');
assert(v(node.innerText), v('\nThe quick brown fox\nJumped over\nThe lazy dog\n'), 'innerText newlines');
assert(node.textContent, 'The quick brown foxJumped overThe lazy\ndog', 'textContent no newlines');

function v(str) {
return str
.replace(/ /g, '·') // Replace space with middle dot
.replace(/\t/g, '→') // Replace tab with right arrow
.replace(/\n/g, '↵'); // Replace newline with down-left arrow
}

node.innerHTML = '<table><tr><th>Header1</th><th>Header2</th></tr><tr><td>Row1,Col1</td><td>Row1,Col2</td></tr><tr><td>Row2,Col1</td><td>Row2,Col2</td></tr></table>';
assert(v(node.innerText), v('\nHeader1\tHeader2\nRow1,Col1\tRow1,Col2\nRow2,Col1\tRow2,Col2'));

node.innerHTML = '<div>prefix<div>div</div>suffix</div>';
assert(v(node.innerText), v('\nprefix\ndiv\nsuffix\n'));

node.innerHTML = '<div>prefix<div>div1<div>div2</div></div>suffix</div>';
assert(v(node.innerText), v('\nprefix\ndiv1\ndiv2\nsuffix\n'));

node.innerHTML = '<div>prefix<div>foo<div>nested</div>bar</div>suffix</div>';
assert(v(node.innerText), v('\nprefix\nfoo\nnested\nbar\nsuffix\n'));

node.innerHTML = `

<div>
content
<div class="crap">
crap
<span>
morecrap
</span>

</div>
<span>
more content
</span>

</div>

`
console.log(node.__getInnerText(textNode =>{
if (textNode.nodeType !== 3) {
return;
}

const el = textNode.parentElement;

if (el.closest(".crap")){
return ""
}


}))
1 change: 1 addition & 0 deletions types/esm/interface/element.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export class Element extends ParentNode implements globalThis.Element {
get tabIndex(): number;
set slot(value: any);
get slot(): any;
__getInnerText(customGetter: any): string;
get innerText(): string;
set textContent(text: string);
/**
Expand Down
1 change: 1 addition & 0 deletions types/esm/interface/image.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ export function ImageClass(ownerDocument: any): {
readonly style: any;
tabIndex: number;
slot: any;
__getInnerText(customGetter: any): string;
readonly innerText: string;
textContent: string;
innerHTML: string;
Expand Down
1 change: 1 addition & 0 deletions types/esm/shared/constants.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export const DOCUMENT_NODE: 9;
export const DOCUMENT_TYPE_NODE: 10;
export const DOCUMENT_FRAGMENT_NODE: 11;
export const BLOCK_ELEMENTS: Set<string>;
export const TABLE_ELEMENTS: Set<string>;
export const SHOW_ALL: -1;
export const SHOW_ELEMENT: 1;
export const SHOW_TEXT: 4;
Expand Down
45 changes: 42 additions & 3 deletions worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -3839,7 +3839,9 @@ const DOCUMENT_TYPE_NODE = 10;
const DOCUMENT_FRAGMENT_NODE = 11;

// Elements
const BLOCK_ELEMENTS = new Set(['ARTICLE', 'ASIDE', 'BLOCKQUOTE', 'BODY', 'BR', 'BUTTON', 'CANVAS', 'CAPTION', 'COL', 'COLGROUP', 'DD', 'DIV', 'DL', 'DT', 'EMBED', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'LI', 'UL', 'OL', 'P']);
const BLOCK_ELEMENTS = new Set(['ARTICLE', 'ASIDE', 'BLOCKQUOTE', 'BODY', 'BR', 'BUTTON', 'CANVAS', 'CAPTION', 'COL', 'COLGROUP', 'DD', 'DIV', 'DL', 'DT', 'EMBED', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'LI', 'UL', 'OL', 'P', 'TR', 'PRE', 'HR', 'ADDRESS']);

const TABLE_ELEMENTS = new Set(['TH', 'TD']);

// TreeWalker
const SHOW_ALL = -1;
Expand Down Expand Up @@ -7739,22 +7741,59 @@ let Element$1 = class Element extends ParentNode {
// </specialGetters>


// <contentRelated>
get innerText() {
__getInnerText(customGetter) {
const text = [];
let {[NEXT]: next, [END]: end} = this;



while (next !== end) {
if (typeof customGetter === "function") {
const custom = customGetter(next);
if (typeof custom === "string") {
text.push(custom);
next = next[NEXT];
continue;
}
}

// Add tabulators between table columns
if (
TABLE_ELEMENTS.has(next.tagName) &&
TABLE_ELEMENTS.has(next.previousElementSibling?.tagName)) {
text.push('\t');
}

if (next.nodeType === TEXT_NODE) {
text.push(next.textContent.replace(/\s+/g, ' '));
} else if(
text.length && next[NEXT] != end &&
BLOCK_ELEMENTS.has(next.tagName)
&& text.at(-1) !== '\n'
) {
text.push('\n');
}

// add line breaks on closing elements
if (
text.at(-1) !== '\n' &&
(!next.nextSibling || BLOCK_ELEMENTS.has(next.nextSibling?.tagName)) &&
(!next.previousSibling || BLOCK_ELEMENTS.has(next.previousSibling?.tagName)) &&
BLOCK_ELEMENTS.has(next.parentElement?.tagName)
) {
text.push('\n');
}

next = next[NEXT];

}
return text.join('');

}

// <contentRelated>
get innerText() {
return this.__getInnerText()
}

/**
Expand Down