From e7c1c8d55c3b565feb61d30ca9056c7e445be0e2 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Mon, 20 Jan 2025 09:53:03 +0700 Subject: [PATCH 1/9] fix(web): display string for reversions accepted after BKSP --- .../src/main/model-compositor.ts | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts index d0b4658b13..7f0e9687bd 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts @@ -16,8 +16,8 @@ import Transform = LexicalModelTypes.Transform; import USVString = LexicalModelTypes.USVString; export class ModelCompositor { - private lexicalModel: LexicalModel; - private contextTracker?: correction.ContextTracker; + readonly lexicalModel: LexicalModel; + readonly contextTracker?: correction.ContextTracker; static readonly MAX_SUGGESTIONS = 12; readonly punctuation: LexicalModelPunctuation; @@ -208,9 +208,9 @@ export class ModelCompositor { acceptSuggestion(suggestion: Suggestion, context: Context, postTransform?: Transform): Reversion { // Step 1: generate and save the reversion's Transform. - let sourceTransform = suggestion.transform; - let deletedLeftChars = context.left.kmwSubstr(-sourceTransform.deleteLeft, sourceTransform.deleteLeft); - let insertedLength = sourceTransform.insert.kmwLength(); + const sourceTransform = suggestion.transform; + const deletedLeftChars = context.left.kmwSubstr(-sourceTransform.deleteLeft, sourceTransform.deleteLeft); + const insertedLength = sourceTransform.insert.kmwLength(); let reversionTransform: Transform = { insert: deletedLeftChars, @@ -233,14 +233,25 @@ export class ModelCompositor { let postContextTokenization = this.tokenize(postContext); if(postContextTokenization) { // Handles display string for reversions triggered by accepting a suggestion mid-token. - const preCaretToken = postContextTokenization.left[postContextTokenization.left.length - 1]; - revertedPrefix = (preCaretToken && !preCaretToken.isWhitespace) ? preCaretToken.text : ''; + // Also handles display string for reversions applied after previous applications - + // namely, after the whitespace that follows them. + let preCaretToken: models.Token; + const leftContext = postContextTokenization.left; + for(let i = leftContext.length - 1; i >= 0; i--) { + // Applying one suggestion on top of another just-accepted suggestion should result in + // reversions that display the originally-replaced context... not the empty-string + // that follows any added whitespace from either of the accepted suggestions. + if(leftContext[i].text && !leftContext[i].isWhitespace) { + preCaretToken = leftContext[i]; + } + } + revertedPrefix = preCaretToken.text ?? ''; revertedPrefix += postContextTokenization.caretSplitsToken ? postContextTokenization.right[0].text : ''; } else { revertedPrefix = this.wordbreak(postContext); } - let firstConversion = models.transformToSuggestion(reversionTransform); + const firstConversion = models.transformToSuggestion(reversionTransform); firstConversion.displayAs = revertedPrefix; // Build the actual Reversion, which is technically an annotated Suggestion. From 9fd68d38478176a1e4cddb1a4054c49c82a9ba42 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Fri, 17 Jan 2025 12:55:44 +0700 Subject: [PATCH 2/9] feat(web): mark accepted suggestions in tracked context --- .../src/main/correction/context-tracker.ts | 8 +++ .../src/main/model-compositor.ts | 62 +++++++++++++++++-- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts index 3f92808c99..751024ed34 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts @@ -13,6 +13,12 @@ import Suggestion = LexicalModelTypes.Suggestion; import Transform = LexicalModelTypes.Transform; import USVString = LexicalModelTypes.USVString; +/** + * Indicates that the owning token's 'replacement' represents part of an + * actually-applied suggestion. + */ +export const APPLIED_SUGGESTION_COMPONENT = -2; + function textToCharTransforms(text: string, transformId?: number) { let perCharTransforms: Transform[] = []; @@ -56,6 +62,7 @@ export class TrackedContextToken { transformDistributions: Distribution[] = []; replacements: TrackedContextSuggestion[] = []; activeReplacementId: number = -1; + replacementTransformId: number = -1; constructor(); constructor(instance: TrackedContextToken); @@ -86,6 +93,7 @@ export class TrackedContextToken { clearReplacements() { this.activeReplacementId = -1; this.replacements = [] + this.replacementTransformId = -1; } /** diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts index 7f0e9687bd..d3ba3bf395 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts @@ -1,5 +1,6 @@ import * as models from '@keymanapp/models-templates'; import * as correction from './correction/index.js' +import APPLIED_SUGGESTION_COMPONENT = correction.APPLIED_SUGGESTION_COMPONENT; import TransformUtils from './transformUtils.js'; import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js'; @@ -14,6 +15,7 @@ import Reversion = LexicalModelTypes.Reversion; import Suggestion = LexicalModelTypes.Suggestion; import Transform = LexicalModelTypes.Transform; import USVString = LexicalModelTypes.USVString; +import { tokenizeTransform } from './correction/transform-tokenization.js'; export class ModelCompositor { readonly lexicalModel: LexicalModel; @@ -277,15 +279,67 @@ export class ModelCompositor { if(!contextState) { contextState = this.contextTracker.analyzeState(this.lexicalModel, context).state; } - - contextState.tail.activeReplacementId = suggestion.id; - let acceptedContext = models.applyTransform(suggestion.transform, context); - this.contextTracker.analyzeState(this.lexicalModel, acceptedContext); + this.markAcceptedSuggestion(suggestion, context, contextState); } return reversion; } + markAcceptedSuggestion(suggestion: Suggestion, context: Context, contextState: correction.TrackedContextState) { + // Suggestion IDs at this level are unique. + contextState.tail.activeReplacementId = suggestion.id; + contextState.tail.replacementTransformId = suggestion.transformId; + contextState.taggedContext = context; + + // Need to mark the 'accepted' aspect of this in some way... + // We know which state it should match; all there is to do is actually do the bookkeeping. + const matchResults = this.contextTracker.analyzeState( + this.lexicalModel, + context, + [{ + sample: suggestion.transform, + p: 1.0 + }], + true + ); + + // If context-tracking handles the applied suggestion properly... + if(matchResults?.baseState) { + // Get the index of the first token altered by the suggestion being applied. + let substitutionTokenIndex = (contextState.tokens.length - 1) - matchResults.headTokensRemoved; + + const tokenizer = determineModelTokenizer(this.lexicalModel) + let tokenizedApplication = tokenizeTransform(tokenizer, context, suggestion.transform); + + // We build our suggestions to do whole-word replacement. Fortunately, that means we already have + // the full suggestions! + const suggestions = contextState.tail.replacements; + if(suggestions && (substitutionTokenIndex + tokenizedApplication.length == matchResults.state.tokens.length)) { + + for(let j = 1; j <= tokenizedApplication.length; j++) { + const replacementPortion: correction.TrackedContextSuggestion = { + suggestion: { + ...suggestion, + id: APPLIED_SUGGESTION_COMPONENT, // Actual suggestions always present non-negative IDs; this can uniquely mark. + transformId: suggestion.transformId, + // this as a component of an applied suggestion. + transform: tokenizedApplication.slice(0, j).reduce((accum, current) => models.buildMergedTransform(accum, current), { insert: '', deleteLeft: 0}), + }, + tokenWidth: j + } + + const token = matchResults.state.tokens[substitutionTokenIndex + j - 1]; + // Attach our fragmented version of the suggestion - the part useful for rewinding it - + // as its own suggestion with a distinct, unique ID indicative of this property. + token.replacements = [ replacementPortion ].concat(suggestions); + token.activeReplacementId = APPLIED_SUGGESTION_COMPONENT; // perhaps give unique ID + overwrite the original suggestion ID. + token.replacementTransformId = suggestion.transformId; + } + } + // else: we're not confident we can map the replacement details safely to do reversion later + } + } + async applyReversion(reversion: Reversion, context: Context): Promise { // If we are unable to track context (because the model does not support LexiconTraversal), // we need a "fallback" strategy. From a5c64cd3d5a45ecb5a89ff67e1537b0448ae7205 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 16 Jan 2025 11:00:06 +0700 Subject: [PATCH 3/9] change(web): allow forced preservation for matched context states --- .../src/main/correction/context-tracker.ts | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts index 751024ed34..71b4c913b6 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts @@ -355,8 +355,17 @@ export class ContextTracker extends CircularArray { static attemptMatchContext( tokenizedContext: Token[], matchState: TrackedContextState, - transformSequenceDistribution?: Distribution + transformSequenceDistribution?: Distribution, + preserveMatchState?: boolean ): ContextMatchResult { + // By default, we just edit the prior state's tokens and keep 'em. + // - If it's an old token, it shouldn't be altered; why use extra memory? + // - If we're editing the current token via typing, we only wish to consider the new + // version of the matched context. + // + // This changes when applying suggestions, as THEN we wish to remember 'before' vs 'after'. + preserveMatchState ??= false; + // Map the previous tokenized state to an edit-distance friendly version. let matchContext: USVString[] = matchState.toRawTokenization(); @@ -393,6 +402,9 @@ export class ContextTracker extends CircularArray { // If mutations HAVE happened, we have work to do. let state = matchState; + if(preserveMatchState) { + state = new TrackedContextState(state); + } let priorEdit: typeof editPath[0]; let poppedTokenCount = 0; @@ -494,7 +506,11 @@ export class ContextTracker extends CircularArray { } const sourceToken = matchState.tokens[i]; - state.tokens[i - poppedTokenCount] = sourceToken; + if(preserveMatchState) { + state.tokens[i - poppedTokenCount] = new TrackedContextToken(sourceToken); + } else { + state.tokens[i - poppedTokenCount] = sourceToken; + } const token = state.tokens[i - poppedTokenCount]; // TODO: I'm beginning to believe that searchSpace should (eventually) be tracked @@ -746,7 +762,7 @@ export class ContextTracker extends CircularArray { continue; } - let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution); + let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution, preserveMatchState); if(result?.state) { // Keep it reasonably current! And it's probably fine to have it more than once From cc3513774db490f66b815a56881c5a3440716b35 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 16 Jan 2025 13:45:37 +0700 Subject: [PATCH 4/9] change(web): adds helpers for applied-suggestion edge detection, suggestion from previous context --- .../worker-thread/src/main/predict-helpers.ts | 93 ++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index 70f739611b..c16eb4a474 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -2,7 +2,7 @@ import * as models from '@keymanapp/models-templates'; import TransformUtils from './transformUtils.js'; import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; -import { ContextTracker, TrackedContextState } from './correction/context-tracker.js'; +import { APPLIED_SUGGESTION_COMPONENT, ContextTracker, TrackedContextState } from './correction/context-tracker.js'; import { ExecutionTimer } from './correction/execution-timer.js'; import ModelCompositor from './model-compositor.js'; import { LexicalModelTypes } from '@keymanapp/common-types'; @@ -16,6 +16,8 @@ import Reversion = LexicalModelTypes.Reversion; import Suggestion = LexicalModelTypes.Suggestion; import SuggestionTag = LexicalModelTypes.SuggestionTag; import Transform = LexicalModelTypes.Transform; +import { deepCopy } from '@keymanapp/web-utils'; +import { LexicalModelPunctuation } from '../../../../../../../common/web/types/src/lexical-model-types.js'; /* * The functions in this file exist to provide unit-testable stateless components for the @@ -83,6 +85,23 @@ export function tupleDisplayOrderSort(a: CorrectionPredictionTuple, b: Correctio return b.totalProb - a.totalProb; } +export function isContextAtAcceptedSuggestionEdge( + postContextState: TrackedContextState +): boolean { + if(!postContextState || postContextState.tail.replacements?.length == 0) { + return false; + } + + // Original suggestion set: acquired. But... they're based on a different context... + const appliedSuggestion = postContextState.tail.replacement; + + if(appliedSuggestion?.suggestion?.id != APPLIED_SUGGESTION_COMPONENT) { + return false; + } + + return true; +} + /** * This method performs the correction-search and model-lookup operations for * prediction generation by using the user's context state and potential @@ -191,6 +210,14 @@ export async function correctAndEnumerate( ); const postContextState = contextChangeAnalysis.state; + if(isContextAtAcceptedSuggestionEdge(postContextState)) { + return { + postContextState: postContextState, + // We're going to ignore whatever we'd generate, so just skip past this phase. + rawPredictions: [] + }; + } + // TODO: Should we filter backspaces & whitespaces out of the transform distribution? // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue. @@ -349,6 +376,70 @@ export async function correctAndEnumerate( }; } +/** + * Given a previous context state to which a suggestion was applied, regenerate its + * suggestions based on the current state of the context, altering the 'keep' suggestion + * into a 'revert' suggestion that will restore the original input. + * @param postContextState + * @param inputTransform + * @param punctuation + * @returns + */ +export function suggestFromPriorContext( + postContextState: TrackedContextState, + inputTransform: Transform, + punctuation: LexicalModelPunctuation +) { + const currentToken = postContextState.tail; + + // Original suggestion set: acquired. But... they're based on a different context... + const appliedSuggestion = currentToken.replacement; + + // We added a partial, synthetic 'replacement' useful for tracking how much to revert. + // It should have a set, distinct ID corresponding to the check below. This partial + // version was never a real suggestion; even if it were, the original is still within our list! + const suggestions = currentToken.replacements.filter((entry) => entry != appliedSuggestion).map((entry) => deepCopy(entry)); + + // First up: we can safely use the raw insert string as the needed length to erase. + // Also, we need to include whatever backspacing occured to reach that point, as the suggestion + // is applied to the context BEFORE the backspace takes effect. + const deleteLeft = appliedSuggestion.suggestion.transform.insert.kmwLength() + inputTransform.deleteLeft; + suggestions.forEach((entry) => { + entry.suggestion.transform.deleteLeft = deleteLeft; + if(inputTransform.id) { + entry.suggestion.transformId = inputTransform.id; + } + }); + + // oh yeah, make sure we map the input's transform ID... + + const keepSuggestion = suggestions.find((entry) => entry.suggestion.tag == 'keep'); + // Convert the original 'keep' suggestion into a REVERT suggestion; this will restore the + // original context. + keepSuggestion.suggestion.tag = 'revert'; + if(currentToken.replacementTransformId) { + keepSuggestion.suggestion.transformId = -currentToken.replacementTransformId; + } + + // Remove any keep-added punctuation; we wish to place the caret immediately after the token, + // with no intervening whitespace or similar punctuation insertion. + const postSuggestionInsert = punctuation.insertAfterWord; + if(postSuggestionInsert) { + const keepTransform = keepSuggestion.suggestion.transform; + const keepText = keepTransform.insert; + + // Validate that the punctuation matches first. + const punct = keepText.slice(keepText.length - postSuggestionInsert.length); + if(punct == postSuggestionInsert) { + // remove the whitespace-ish punctuation if it was added! + keepTransform.insert = keepText.slice(0, keepText.length - postSuggestionInsert.length); + } + } + + // Bypass the rest of the correction-search; just use these! + return suggestions.map((entry) => entry.suggestion); +} + export function shouldStopSearchingEarly( bestCorrectionCost: number, currentCorrectionCost: number, From 4e85ebe87d7eb4a038a20a8f68362ee39a8aceef Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Fri, 17 Jan 2025 13:05:03 +0700 Subject: [PATCH 5/9] feat(web): integrate new predictive-text helpers --- .../worker-thread/src/main/model-compositor.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts index d3ba3bf395..a033e14742 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts @@ -3,7 +3,7 @@ import * as correction from './correction/index.js' import APPLIED_SUGGESTION_COMPONENT = correction.APPLIED_SUGGESTION_COMPONENT; import TransformUtils from './transformUtils.js'; -import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js'; +import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, isContextAtAcceptedSuggestionEdge, predictionAutoSelect, processSimilarity, suggestFromPriorContext, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js'; import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; import { LexicalModelTypes } from '@keymanapp/common-types'; import CasingForm = LexicalModelTypes.CasingForm; @@ -132,6 +132,11 @@ export class ModelCompositor { const { postContextState, rawPredictions } = await correctAndEnumerate(this.contextTracker, this.lexicalModel, timer, transformDistribution, context); + // Check: did we just reach the tail of a prior token via BKSP? + if(isContextAtAcceptedSuggestionEdge(postContextState)) { + return suggestFromPriorContext(postContextState, inputTransform, this.punctuation); + } + if(this.activeTimer == timer) { this.activeTimer = null; } From bc78e893d5a3263b416acc079fd12a3d88aa1153 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 16 Jan 2025 12:13:04 +0700 Subject: [PATCH 6/9] change(web): suggestion-banner logic for handling delayed reversions --- .../src/prediction/predictionContext.ts | 15 +++++++++++++-- .../main/src/headless/transcriptionCache.ts | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/web/src/engine/interfaces/src/prediction/predictionContext.ts b/web/src/engine/interfaces/src/prediction/predictionContext.ts index ec632a5058..0abcd2452b 100644 --- a/web/src/engine/interfaces/src/prediction/predictionContext.ts +++ b/web/src/engine/interfaces/src/prediction/predictionContext.ts @@ -319,9 +319,14 @@ export default class PredictionContext extends EventEmitter