Skip to content

Commit

Permalink
Whole word scanning (#1330)
Browse files Browse the repository at this point in the history
* --wip-- [skip ci]

* --wip-- [skip ci]

* Add scanResolution setting

* Avoid popup flickering

* Fix tests

* Remove guard clause

* Fix test

* Use wordBoundary instead of whitespace

* Copy changes

* Treat element nodes as word boundaries

* Revert getNextNode change

* Rename variable for clarity

* Account for single quotes
  • Loading branch information
jamesmaa authored Aug 24, 2024
1 parent ebce402 commit e7bab2c
Show file tree
Hide file tree
Showing 13 changed files with 147 additions and 16 deletions.
11 changes: 10 additions & 1 deletion ext/data/schemas/options-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,8 @@
"hidePopupOnCursorExit",
"hidePopupOnCursorExitDelay",
"normalizeCssZoom",
"scanWithoutMousemove"
"scanWithoutMousemove",
"scanResolution"
],
"properties": {
"inputs": {
Expand Down Expand Up @@ -764,6 +765,14 @@
"scanWithoutMousemove": {
"type": "boolean",
"default": true
},
"scanResolution": {
"type": "string",
"enum": [
"character",
"word"
],
"default": "character"
}
}
},
Expand Down
1 change: 1 addition & 0 deletions ext/js/app/frontend.js
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ export class Frontend {
sentenceParsingOptions,
scanAltText: scanningOptions.scanAltText,
scanWithoutMousemove: scanningOptions.scanWithoutMousemove,
scanResolution: scanningOptions.scanResolution,
});
this._updateTextScannerEnabled();

Expand Down
11 changes: 11 additions & 0 deletions ext/js/data/options-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ export class OptionsUtil {
this._updateVersion48,
this._updateVersion49,
this._updateVersion50,
this._updateVersion51,
];
/* eslint-enable @typescript-eslint/unbound-method */
if (typeof targetVersion === 'number' && targetVersion < result.length) {
Expand Down Expand Up @@ -1476,6 +1477,16 @@ export class OptionsUtil {
}
}

/**
* - Add scanning.scanResolution
* @type {import('options-util').UpdateFunction}
*/
async _updateVersion51(options) {
for (const profile of options.profiles) {
profile.options.scanning.scanResolution = 'character';
}
}

/**
* @param {string} url
* @returns {Promise<chrome.tabs.Tab>}
Expand Down
1 change: 1 addition & 0 deletions ext/js/display/display.js
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ export class Display extends EventDispatcher {
sentenceParsingOptions,
scanAltText: scanningOptions.scanAltText,
scanWithoutMousemove: scanningOptions.scanWithoutMousemove,
scanResolution: scanningOptions.scanResolution,
},
});

Expand Down
71 changes: 63 additions & 8 deletions ext/js/dom/dom-text-scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ import {readCodePointsBackward, readCodePointsForward} from '../data/string-util
* A class used to scan text in a document.
*/
export class DOMTextScanner {
/**
* A regular expression used to match word delimiters.
* \p{L} matches any kind of letter from any language
* \p{N} matches any kind of numeric character in any script
* @type {RegExp}
*/
static WORD_DELIMITER_REGEX = /[^\w\p{L}\p{N}]/u;

/**
* Creates a new instance of a DOMTextScanner.
* @param {Node} node The DOM Node to start at.
Expand All @@ -30,8 +38,9 @@ export class DOMTextScanner {
* @param {boolean} forcePreserveWhitespace Whether or not whitespace should be forced to be preserved,
* regardless of CSS styling.
* @param {boolean} generateLayoutContent Whether or not newlines should be added based on CSS styling.
* @param {boolean} stopAtWordBoundary Whether to pause scanning when whitespace is encountered when scanning backwards.
*/
constructor(node, offset, forcePreserveWhitespace = false, generateLayoutContent = true) {
constructor(node, offset, forcePreserveWhitespace = false, generateLayoutContent = true, stopAtWordBoundary = false) {
const ruby = DOMTextScanner.getParentRubyElement(node);
const resetOffset = (ruby !== null);
if (resetOffset) { node = ruby; }
Expand All @@ -54,10 +63,17 @@ export class DOMTextScanner {
this._lineHasWhitespace = false;
/** @type {boolean} */
this._lineHasContent = false;
/** @type {boolean} */
/**
* @type {boolean} Whether or not whitespace should be forced to be preserved,
* regardless of CSS styling.
*/
this._forcePreserveWhitespace = forcePreserveWhitespace;
/** @type {boolean} */
this._generateLayoutContent = generateLayoutContent;
/**
* @type {boolean} Whether or not to stop scanning when word boundaries are encountered.
*/
this._stopAtWordBoundary = stopAtWordBoundary;
}

/**
Expand Down Expand Up @@ -130,6 +146,10 @@ export class DOMTextScanner {
break;
}
} else if (nodeType === ELEMENT_NODE) {
if (this._stopAtWordBoundary && !forward) {
// Element nodes are considered word boundaries when scanning backwards
break;
}
lastNode = node;
const initialNodeAtBeginningOfNodeGoingBackwards = node === this._initialNode && this._offset === 0 && !forward;
const initialNodeAtEndOfNodeGoingForwards = node === this._initialNode && this._offset === node.childNodes.length && forward;
Expand All @@ -145,7 +165,7 @@ export class DOMTextScanner {

/** @type {Node[]} */
const exitedNodes = [];
node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes);
node = DOMTextScanner.getNextNodeToProcess(node, forward, enterable, exitedNodes);

for (const exitedNode of exitedNodes) {
if (exitedNode.nodeType !== ELEMENT_NODE) { continue; }
Expand Down Expand Up @@ -206,9 +226,19 @@ export class DOMTextScanner {
const nodeValueLength = nodeValue.length;
const {preserveNewlines, preserveWhitespace} = this._getWhitespaceSettings(textNode);
if (resetOffset) { this._offset = nodeValueLength; }

while (this._offset > 0) {
const char = readCodePointsBackward(nodeValue, this._offset - 1, 1);
if (this._stopAtWordBoundary && DOMTextScanner.isWordDelimiter(char)) {
if (DOMTextScanner.isSingleQuote(char) && this._offset > 1) {
// Check to see if char before single quote is a word character (e.g. "don't")
const prevChar = readCodePointsBackward(nodeValue, this._offset - 2, 1);
if (DOMTextScanner.isWordDelimiter(prevChar)) {
return false;
}
} else {
return false;
}
}
this._offset -= char.length;
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
if (this._checkCharacterBackward(char, charAttributes)) { break; }
Expand Down Expand Up @@ -244,7 +274,7 @@ export class DOMTextScanner {
/**
* @param {string} char
* @param {import('dom-text-scanner').CharacterAttributes} charAttributes
* @returns {boolean}
* @returns {boolean} Whether or not to stop scanning.
*/
_checkCharacterForward(char, charAttributes) {
switch (charAttributes) {
Expand Down Expand Up @@ -300,7 +330,7 @@ export class DOMTextScanner {
/**
* @param {string} char
* @param {import('dom-text-scanner').CharacterAttributes} charAttributes
* @returns {boolean}
* @returns {boolean} Whether or not to stop scanning.
*/
_checkCharacterBackward(char, charAttributes) {
switch (charAttributes) {
Expand Down Expand Up @@ -356,14 +386,14 @@ export class DOMTextScanner {
// Static helpers

/**
* Gets the next node in the document for a specified scanning direction.
* Gets the next node to process in the document for a specified scanning direction.
* @param {Node} node The current DOM Node.
* @param {boolean} forward Whether to scan forward in the document or backward.
* @param {boolean} visitChildren Whether the children of the current node should be visited.
* @param {Node[]} exitedNodes An array which stores nodes which were exited.
* @returns {?Node} The next node in the document, or `null` if there is no next node.
*/
static getNextNode(node, forward, visitChildren, exitedNodes) {
static getNextNodeToProcess(node, forward, visitChildren, exitedNodes) {
/** @type {?Node} */
let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null;
if (next === null) {
Expand Down Expand Up @@ -488,6 +518,31 @@ export class DOMTextScanner {
}
}

/**
* @param {string} character
* @returns {boolean}
*/
static isWordDelimiter(character) {
return DOMTextScanner.WORD_DELIMITER_REGEX.test(character);
}

/**
* @param {string} character
* @returns {boolean}
*/
static isSingleQuote(character) {
switch (character.charCodeAt(0)) {
case 0x27: // Single quote ('')
case 0x2019: // Right single quote (’)
case 0x2032: // Prime (′)
case 0x2035: // Reversed prime (‵)
case 0x02bc: // Modifier letter apostrophe (ʼ)
return true;
default:
return false;
}
}

/**
* Checks whether a given style is visible or not.
* This function does not check `style.display === 'none'`.
Expand Down
6 changes: 3 additions & 3 deletions ext/js/dom/text-source-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -531,19 +531,19 @@ export class TextSourceGenerator {
let previousStyles = null;
try {
let i = 0;
let startContinerPre = null;
let startContainerPre = null;
while (true) {
const range = this._caretRangeFromPoint(x, y);
if (range === null) {
return null;
}

const startContainer = range.startContainer;
if (startContinerPre !== startContainer) {
if (startContainerPre !== startContainer) {
if (this._isPointInRange(x, y, range, normalizeCssZoom, language)) {
return range;
}
startContinerPre = startContainer;
startContainerPre = startContainer;
}

if (previousStyles === null) { previousStyles = new Map(); }
Expand Down
7 changes: 4 additions & 3 deletions ext/js/dom/text-source-range.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,16 @@ export class TextSourceRange {


/**
* Moves the start offset of the text by a set amount of unicode codepoints.
* Moves the start offset of the text backwards by a set amount of unicode codepoints.
* @param {number} length The maximum number of codepoints to move by.
* @param {boolean} layoutAwareScan Whether or not HTML layout information should be used to generate
* the string content when scanning.
* @param {boolean} stopAtWordBoundary Whether to stop at whitespace characters.
* @returns {number} The actual number of codepoints that were read.
*/
setStartOffset(length, layoutAwareScan) {
setStartOffset(length, layoutAwareScan, stopAtWordBoundary = false) {
if (this._disallowExpandSelection) { return 0; }
const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length);
const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan, stopAtWordBoundary).seek(-length);
this._range.setStart(state.node, state.offset);
this._rangeStartOffset = this._range.startOffset;
this._content = state.content + this._content;
Expand Down
9 changes: 9 additions & 0 deletions ext/js/language/text-scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ export class TextScanner extends EventDispatcher {
matchTypePrefix,
scanAltText,
scanWithoutMousemove,
scanResolution,
}) {
if (Array.isArray(inputs)) {
this._inputs = inputs.map((input) => this._convertInput(input));
Expand Down Expand Up @@ -299,6 +300,9 @@ export class TextScanner extends EventDispatcher {
if (typeof scanWithoutMousemove === 'boolean') {
this._scanWithoutMousemove = scanWithoutMousemove;
}
if (typeof scanResolution === 'string') {
this._scanResolution = scanResolution;
}
if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
const {scanExtent, terminationCharacterMode, terminationCharacters} = sentenceParsingOptions;
if (typeof scanExtent === 'number') {
Expand Down Expand Up @@ -465,6 +469,11 @@ export class TextScanner extends EventDispatcher {
null
);

if (this._scanResolution === 'word') {
// Move the start offset to the beginning of the word
textSource.setStartOffset(this._scanLength, this._layoutAwareScan, true);
}

if (this._textSourceCurrent !== null && this._textSourceCurrent.hasSameStart(textSource)) {
return;
}
Expand Down
27 changes: 27 additions & 0 deletions ext/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,33 @@ <h1>Yomitan Settings</h1>
</p>
</div>
</div>
<div class="settings-item">
<div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">Scan resolution</div>
<div class="settings-item-description">
Start the lookup scan at the word or character of the cursor position.
<a tabindex="0" class="more-toggle more-only" data-parent-distance="4">More&hellip;</a>
</div>
</div>
<div class="settings-item-right">
<select data-setting="scanning.scanResolution">
<option value="character">Character</option>
<option value="word">Word</option>
</select>
</div>
</div>
<div class="settings-item-children more" hidden>
<p>
The scan resolution determines where the scan starts when the cursor is moved.
The <code>Character</code> option will start scanning at the cursor's current position,
while the <code>Word</code> option will start scanning at the beginning of the word.
</p>
<p>
<a tabindex="0" class="more-toggle" data-parent-distance="3">Less&hellip;</a>
</p>
</div>
</div>
<div class="settings-item"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Scan using middle mouse button</div>
Expand Down
3 changes: 2 additions & 1 deletion test/options-util.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ function createProfileOptionsUpdatedTestData1() {
onSearchQuery: false,
},
scanWithoutMousemove: true,
scanResolution: 'character',
inputs: [
{
include: 'shift',
Expand Down Expand Up @@ -644,7 +645,7 @@ function createOptionsUpdatedTestData1() {
},
],
profileCurrent: 0,
version: 50,
version: 51,
global: {
database: {
prefixWildcardsSupported: false,
Expand Down
14 changes: 14 additions & 0 deletions types/ext/dom-text-scanner.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,15 @@ export type CharacterAttributes = 0 | 1 | 2 | 3;
* - 2 newlines corresponds to a significant visual distinction since the previous content.
*/
export type ElementSeekInfo = {
/**
* Indicates whether the content of this node should be entered.
*/
enterable: boolean;
/**
* The number of newline characters that should be added.
* - 1 newline corresponds to a simple new line in the layout.
* - 2 newlines corresponds to a significant visual distinction since the previous content.
*/
newlines: number;
};

Expand All @@ -43,6 +51,12 @@ export type ElementSeekInfo = {
* `preserveWhitespace` indicates whether or not sequences of whitespace characters are collapsed.
*/
export type WhitespaceSettings = {
/**
* Indicates whether or not newline characters are treated as line breaks.
*/
preserveNewlines: boolean;
/**
* Indicates whether or not sequences of whitespace characters are collapsed.¬
*/
preserveWhitespace: boolean;
};
1 change: 1 addition & 0 deletions types/ext/settings.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ export type ScanningOptions = {
normalizeCssZoom: boolean;
scanAltText: boolean;
scanWithoutMousemove: boolean;
scanResolution: string;
};

export type ScanningInput = {
Expand Down
1 change: 1 addition & 0 deletions types/ext/text-scanner.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export type Options = {
sentenceParsingOptions?: SentenceParsingOptions;
scanAltText?: boolean;
scanWithoutMousemove?: boolean;
scanResolution?: string;
};

export type InputOptionsOuter = {
Expand Down

0 comments on commit e7bab2c

Please sign in to comment.