From e7c1c8d55c3b565feb61d30ca9056c7e445be0e2 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Mon, 20 Jan 2025 09:53:03 +0700
Subject: [PATCH 1/9] fix(web): display string for reversions accepted after
 BKSP

---
 .../src/main/model-compositor.ts              | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index d0b4658b13..7f0e9687bd 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -16,8 +16,8 @@ import Transform = LexicalModelTypes.Transform;
 import USVString = LexicalModelTypes.USVString;
 
 export class ModelCompositor {
-  private lexicalModel: LexicalModel;
-  private contextTracker?: correction.ContextTracker;
+  readonly lexicalModel: LexicalModel;
+  readonly contextTracker?: correction.ContextTracker;
 
   static readonly MAX_SUGGESTIONS = 12;
   readonly punctuation: LexicalModelPunctuation;
@@ -208,9 +208,9 @@ export class ModelCompositor {
 
   acceptSuggestion(suggestion: Suggestion, context: Context, postTransform?: Transform): Reversion {
     // Step 1:  generate and save the reversion's Transform.
-    let sourceTransform = suggestion.transform;
-    let deletedLeftChars = context.left.kmwSubstr(-sourceTransform.deleteLeft, sourceTransform.deleteLeft);
-    let insertedLength = sourceTransform.insert.kmwLength();
+    const sourceTransform = suggestion.transform;
+    const deletedLeftChars = context.left.kmwSubstr(-sourceTransform.deleteLeft, sourceTransform.deleteLeft);
+    const insertedLength = sourceTransform.insert.kmwLength();
 
     let reversionTransform: Transform = {
       insert: deletedLeftChars,
@@ -233,14 +233,25 @@ export class ModelCompositor {
     let postContextTokenization = this.tokenize(postContext);
     if(postContextTokenization) {
       // Handles display string for reversions triggered by accepting a suggestion mid-token.
-      const preCaretToken = postContextTokenization.left[postContextTokenization.left.length - 1];
-      revertedPrefix = (preCaretToken && !preCaretToken.isWhitespace) ? preCaretToken.text : '';
+      // Also handles display string for reversions applied after previous applications - 
+      // namely, after the whitespace that follows them.
+      let preCaretToken: models.Token;
+      const leftContext = postContextTokenization.left;
+      for(let i = leftContext.length - 1; i >= 0; i--) {
+        // Applying one suggestion on top of another just-accepted suggestion should result in
+        // reversions that display the originally-replaced context... not the empty-string
+        // that follows any added whitespace from either of the accepted suggestions.
+        if(leftContext[i].text && !leftContext[i].isWhitespace) {
+          preCaretToken = leftContext[i];
+        }
+      }
+      revertedPrefix = preCaretToken.text ?? '';
       revertedPrefix += postContextTokenization.caretSplitsToken ? postContextTokenization.right[0].text : '';
     } else {
       revertedPrefix = this.wordbreak(postContext);
     }
 
-    let firstConversion = models.transformToSuggestion(reversionTransform);
+    const firstConversion = models.transformToSuggestion(reversionTransform);
     firstConversion.displayAs = revertedPrefix;
 
     // Build the actual Reversion, which is technically an annotated Suggestion.

From 9fd68d38478176a1e4cddb1a4054c49c82a9ba42 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Fri, 17 Jan 2025 12:55:44 +0700
Subject: [PATCH 2/9] feat(web): mark accepted suggestions in tracked context

---
 .../src/main/correction/context-tracker.ts    |  8 +++
 .../src/main/model-compositor.ts              | 62 +++++++++++++++++--
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts
index 3f92808c99..751024ed34 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts
@@ -13,6 +13,12 @@ import Suggestion = LexicalModelTypes.Suggestion;
 import Transform = LexicalModelTypes.Transform;
 import USVString = LexicalModelTypes.USVString;
 
+/**
+ * Indicates that the owning token's 'replacement' represents part of an
+ * actually-applied suggestion.
+ */
+export const APPLIED_SUGGESTION_COMPONENT = -2;
+
 function textToCharTransforms(text: string, transformId?: number) {
   let perCharTransforms: Transform[] = [];
 
@@ -56,6 +62,7 @@ export class TrackedContextToken {
   transformDistributions: Distribution<Transform>[] = [];
   replacements: TrackedContextSuggestion[] = [];
   activeReplacementId: number = -1;
+  replacementTransformId: number = -1;
 
   constructor();
   constructor(instance: TrackedContextToken);
@@ -86,6 +93,7 @@ export class TrackedContextToken {
   clearReplacements() {
     this.activeReplacementId = -1;
     this.replacements = []
+    this.replacementTransformId = -1;
   }
 
   /**
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index 7f0e9687bd..d3ba3bf395 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -1,5 +1,6 @@
 import * as models from '@keymanapp/models-templates';
 import * as correction from './correction/index.js'
+import APPLIED_SUGGESTION_COMPONENT = correction.APPLIED_SUGGESTION_COMPONENT;
 
 import TransformUtils from './transformUtils.js';
 import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
@@ -14,6 +15,7 @@ import Reversion = LexicalModelTypes.Reversion;
 import Suggestion = LexicalModelTypes.Suggestion;
 import Transform = LexicalModelTypes.Transform;
 import USVString = LexicalModelTypes.USVString;
+import { tokenizeTransform } from './correction/transform-tokenization.js';
 
 export class ModelCompositor {
   readonly lexicalModel: LexicalModel;
@@ -277,15 +279,67 @@ export class ModelCompositor {
       if(!contextState) {
         contextState = this.contextTracker.analyzeState(this.lexicalModel, context).state;
       }
-
-      contextState.tail.activeReplacementId = suggestion.id;
-      let acceptedContext = models.applyTransform(suggestion.transform, context);
-      this.contextTracker.analyzeState(this.lexicalModel, acceptedContext);
+      this.markAcceptedSuggestion(suggestion, context, contextState);
     }
 
     return reversion;
   }
 
+  markAcceptedSuggestion(suggestion: Suggestion, context: Context, contextState: correction.TrackedContextState) {
+    // Suggestion IDs at this level are unique.
+    contextState.tail.activeReplacementId = suggestion.id;
+    contextState.tail.replacementTransformId = suggestion.transformId;
+    contextState.taggedContext = context;
+    
+    // Need to mark the 'accepted' aspect of this in some way...
+    // We know which state it should match; all there is to do is actually do the bookkeeping.
+    const matchResults = this.contextTracker.analyzeState(
+      this.lexicalModel, 
+      context, 
+      [{
+        sample: suggestion.transform,
+        p: 1.0
+      }],
+      true
+    );
+
+    // If context-tracking handles the applied suggestion properly...
+    if(matchResults?.baseState) {
+      // Get the index of the first token altered by the suggestion being applied.
+      let substitutionTokenIndex = (contextState.tokens.length - 1) - matchResults.headTokensRemoved;
+
+      const tokenizer = determineModelTokenizer(this.lexicalModel)
+      let tokenizedApplication = tokenizeTransform(tokenizer, context, suggestion.transform);
+
+      // We build our suggestions to do whole-word replacement.  Fortunately, that means we already have
+      // the full suggestions!
+      const suggestions = contextState.tail.replacements;
+      if(suggestions && (substitutionTokenIndex + tokenizedApplication.length == matchResults.state.tokens.length)) {
+
+        for(let j = 1; j <= tokenizedApplication.length; j++) { 
+          const replacementPortion: correction.TrackedContextSuggestion = {
+            suggestion: {
+              ...suggestion,
+              id: APPLIED_SUGGESTION_COMPONENT, // Actual suggestions always present non-negative IDs; this can uniquely mark.
+              transformId: suggestion.transformId,
+              // this as a component of an applied suggestion.
+              transform: tokenizedApplication.slice(0, j).reduce((accum, current) => models.buildMergedTransform(accum, current), { insert: '', deleteLeft: 0}),
+            },
+            tokenWidth: j
+          }
+
+          const token = matchResults.state.tokens[substitutionTokenIndex + j - 1];
+          // Attach our fragmented version of the suggestion - the part useful for rewinding it -
+          // as its own suggestion with a distinct, unique ID indicative of this property.
+          token.replacements = [ replacementPortion ].concat(suggestions);
+          token.activeReplacementId = APPLIED_SUGGESTION_COMPONENT; // perhaps give unique ID + overwrite the original suggestion ID.
+          token.replacementTransformId = suggestion.transformId;
+        }
+      }
+      // else: we're not confident we can map the replacement details safely to do reversion later
+    }
+  }
+
   async applyReversion(reversion: Reversion, context: Context): Promise<Suggestion[]> {
     // If we are unable to track context (because the model does not support LexiconTraversal),
     // we need a "fallback" strategy.

From a5c64cd3d5a45ecb5a89ff67e1537b0448ae7205 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Thu, 16 Jan 2025 11:00:06 +0700
Subject: [PATCH 3/9] change(web): allow forced preservation for matched
 context states

---
 .../src/main/correction/context-tracker.ts    | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts
index 751024ed34..71b4c913b6 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tracker.ts
@@ -355,8 +355,17 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
   static attemptMatchContext(
     tokenizedContext: Token[],
     matchState: TrackedContextState,
-    transformSequenceDistribution?: Distribution<Transform[]>
+    transformSequenceDistribution?: Distribution<Transform[]>,
+    preserveMatchState?: boolean
   ): ContextMatchResult {
+    // By default, we just edit the prior state's tokens and keep 'em.
+    // - If it's an old token, it shouldn't be altered; why use extra memory?
+    // - If we're editing the current token via typing, we only wish to consider the new
+    //   version of the matched context.
+    //
+    // This changes when applying suggestions, as THEN we wish to remember 'before' vs 'after'.
+    preserveMatchState ??= false;
+
     // Map the previous tokenized state to an edit-distance friendly version.
     let matchContext: USVString[] = matchState.toRawTokenization();
 
@@ -393,6 +402,9 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
 
     // If mutations HAVE happened, we have work to do.
     let state = matchState;
+    if(preserveMatchState) {
+      state = new TrackedContextState(state);
+    }
 
     let priorEdit: typeof editPath[0];
     let poppedTokenCount = 0;
@@ -494,7 +506,11 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
           }
 
           const sourceToken = matchState.tokens[i];
-          state.tokens[i - poppedTokenCount] = sourceToken;
+          if(preserveMatchState) {
+            state.tokens[i - poppedTokenCount] = new TrackedContextToken(sourceToken);
+          } else {
+            state.tokens[i - poppedTokenCount] = sourceToken;
+          }
           const token = state.tokens[i - poppedTokenCount];
 
           // TODO:  I'm beginning to believe that searchSpace should (eventually) be tracked
@@ -746,7 +762,7 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
           continue;
         }
 
-        let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution);
+        let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution, preserveMatchState);
 
         if(result?.state) {
           // Keep it reasonably current!  And it's probably fine to have it more than once

From cc3513774db490f66b815a56881c5a3440716b35 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Thu, 16 Jan 2025 13:45:37 +0700
Subject: [PATCH 4/9] change(web): adds helpers for applied-suggestion edge
 detection, suggestion from previous context

---
 .../worker-thread/src/main/predict-helpers.ts | 93 ++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 70f739611b..c16eb4a474 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -2,7 +2,7 @@ import * as models from '@keymanapp/models-templates';
 
 import TransformUtils from './transformUtils.js';
 import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
-import { ContextTracker, TrackedContextState } from './correction/context-tracker.js';
+import { APPLIED_SUGGESTION_COMPONENT, ContextTracker, TrackedContextState } from './correction/context-tracker.js';
 import { ExecutionTimer } from './correction/execution-timer.js';
 import ModelCompositor from './model-compositor.js';
 import { LexicalModelTypes } from '@keymanapp/common-types';
@@ -16,6 +16,8 @@ import Reversion = LexicalModelTypes.Reversion;
 import Suggestion = LexicalModelTypes.Suggestion;
 import SuggestionTag = LexicalModelTypes.SuggestionTag;
 import Transform = LexicalModelTypes.Transform;
+import { deepCopy } from '@keymanapp/web-utils';
+import { LexicalModelPunctuation } from '../../../../../../../common/web/types/src/lexical-model-types.js';
 
 /*
  * The functions in this file exist to provide unit-testable stateless components for the
@@ -83,6 +85,23 @@ export function tupleDisplayOrderSort(a: CorrectionPredictionTuple, b: Correctio
   return b.totalProb - a.totalProb;
 }
 
+export function isContextAtAcceptedSuggestionEdge(
+  postContextState: TrackedContextState
+): boolean {
+  if(!postContextState || postContextState.tail.replacements?.length == 0) {
+    return false;
+  }
+
+  // Original suggestion set:  acquired.  But... they're based on a different context...
+  const appliedSuggestion = postContextState.tail.replacement;
+
+  if(appliedSuggestion?.suggestion?.id != APPLIED_SUGGESTION_COMPONENT) {
+    return false;
+  }
+
+  return true;
+}
+
 /**
  * This method performs the correction-search and model-lookup operations for
  * prediction generation by using the user's context state and potential
@@ -191,6 +210,14 @@ export async function correctAndEnumerate(
   );
   const postContextState = contextChangeAnalysis.state;
 
+  if(isContextAtAcceptedSuggestionEdge(postContextState)) {
+    return {
+      postContextState: postContextState,
+      // We're going to ignore whatever we'd generate, so just skip past this phase.
+      rawPredictions: []
+    };
+  }
+
   // TODO:  Should we filter backspaces & whitespaces out of the transform distribution?
   //        Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue.
 
@@ -349,6 +376,70 @@ export async function correctAndEnumerate(
   };
 }
 
+/**
+ * Given a previous context state to which a suggestion was applied, regenerate its
+ * suggestions based on the current state of the context, altering the 'keep' suggestion
+ * into a 'revert' suggestion that will restore the original input.
+ * @param postContextState
+ * @param inputTransform 
+ * @param punctuation 
+ * @returns 
+ */
+export function suggestFromPriorContext(
+  postContextState: TrackedContextState,
+  inputTransform: Transform,
+  punctuation: LexicalModelPunctuation
+) {
+  const currentToken = postContextState.tail;
+
+  // Original suggestion set:  acquired.  But... they're based on a different context...
+  const appliedSuggestion = currentToken.replacement;
+
+  // We added a partial, synthetic 'replacement' useful for tracking how much to revert.
+  // It should have a set, distinct ID corresponding to the check below.  This partial
+  // version was never a real suggestion; even if it were, the original is still within our list!
+  const suggestions = currentToken.replacements.filter((entry) => entry != appliedSuggestion).map((entry) => deepCopy(entry));
+
+  // First up:  we can safely use the raw insert string as the needed length to erase.
+  // Also, we need to include whatever backspacing occured to reach that point, as the suggestion
+  // is applied to the context BEFORE the backspace takes effect.
+  const deleteLeft = appliedSuggestion.suggestion.transform.insert.kmwLength() + inputTransform.deleteLeft;
+  suggestions.forEach((entry) => {
+    entry.suggestion.transform.deleteLeft = deleteLeft;
+    if(inputTransform.id) {
+      entry.suggestion.transformId = inputTransform.id;
+    }
+  });
+
+  // oh yeah, make sure we map the input's transform ID...
+
+  const keepSuggestion = suggestions.find((entry) => entry.suggestion.tag == 'keep');
+  // Convert the original 'keep' suggestion into a REVERT suggestion; this will restore the
+  // original context.
+  keepSuggestion.suggestion.tag = 'revert';
+  if(currentToken.replacementTransformId) {
+    keepSuggestion.suggestion.transformId = -currentToken.replacementTransformId;
+  }
+
+  // Remove any keep-added punctuation; we wish to place the caret immediately after the token,
+  // with no intervening whitespace or similar punctuation insertion.
+  const postSuggestionInsert = punctuation.insertAfterWord;
+  if(postSuggestionInsert) {
+    const keepTransform = keepSuggestion.suggestion.transform;
+    const keepText = keepTransform.insert;
+
+    // Validate that the punctuation matches first.
+    const punct = keepText.slice(keepText.length - postSuggestionInsert.length);
+    if(punct == postSuggestionInsert) {
+      // remove the whitespace-ish punctuation if it was added!
+      keepTransform.insert = keepText.slice(0, keepText.length - postSuggestionInsert.length);
+    }
+  }
+
+  // Bypass the rest of the correction-search; just use these!
+  return suggestions.map((entry) => entry.suggestion);
+}
+
 export function shouldStopSearchingEarly(
   bestCorrectionCost: number,
   currentCorrectionCost: number,

From 4e85ebe87d7eb4a038a20a8f68362ee39a8aceef Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Fri, 17 Jan 2025 13:05:03 +0700
Subject: [PATCH 5/9] feat(web): integrate new predictive-text helpers

---
 .../worker-thread/src/main/model-compositor.ts             | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index d3ba3bf395..a033e14742 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -3,7 +3,7 @@ import * as correction from './correction/index.js'
 import APPLIED_SUGGESTION_COMPONENT = correction.APPLIED_SUGGESTION_COMPONENT;
 
 import TransformUtils from './transformUtils.js';
-import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
+import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, isContextAtAcceptedSuggestionEdge, predictionAutoSelect, processSimilarity, suggestFromPriorContext, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
 import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 import CasingForm = LexicalModelTypes.CasingForm;
@@ -132,6 +132,11 @@ export class ModelCompositor {
 
     const { postContextState, rawPredictions } = await correctAndEnumerate(this.contextTracker, this.lexicalModel, timer, transformDistribution, context);
 
+    // Check:  did we just reach the tail of a prior token via BKSP?
+    if(isContextAtAcceptedSuggestionEdge(postContextState)) {
+      return suggestFromPriorContext(postContextState, inputTransform, this.punctuation);
+    }
+
     if(this.activeTimer == timer) {
       this.activeTimer = null;
     }

From bc78e893d5a3263b416acc079fd12a3d88aa1153 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Thu, 16 Jan 2025 12:13:04 +0700
Subject: [PATCH 6/9] change(web): suggestion-banner logic for handling delayed
 reversions

---
 .../src/prediction/predictionContext.ts           | 15 +++++++++++++--
 .../main/src/headless/transcriptionCache.ts       |  2 +-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/web/src/engine/interfaces/src/prediction/predictionContext.ts b/web/src/engine/interfaces/src/prediction/predictionContext.ts
index ec632a5058..0abcd2452b 100644
--- a/web/src/engine/interfaces/src/prediction/predictionContext.ts
+++ b/web/src/engine/interfaces/src/prediction/predictionContext.ts
@@ -319,9 +319,14 @@ export default class PredictionContext extends EventEmitter<PredictionContextEve
     // Do we have a keep suggestion?  If so, remove it from the list so that we can control its display position
     // and prevent it from being hidden after reversion operations.
     this.keepSuggestion = null;
+    this.revertSuggestion = null;
+    this.doRevert = false;
+    
     for (let s of suggestions) {
       if(s.tag == 'keep') {
         this.keepSuggestion = s as Keep;
+      } else if(s.tag == 'revert') {
+        this.revertSuggestion = s as Reversion;
       }
 
       if (this.langProcessor.mayAutoCorrect && s.autoAccept && !this.selected) {
@@ -329,9 +334,11 @@ export default class PredictionContext extends EventEmitter<PredictionContextEve
       }
     }
 
-    if(this.keepSuggestion) {
+    if(this.revertSuggestion) {
+      this._currentSuggestions.splice(this._currentSuggestions.indexOf(this.revertSuggestion), 1);
+    } else if(this.keepSuggestion) {
       this._currentSuggestions.splice(this._currentSuggestions.indexOf(this.keepSuggestion), 1);
-    }
+    } 
 
     // If we've gotten an update request like this, it's almost always user-triggered and means the context has shifted.
     if(!this.swallowPrediction) {
@@ -342,6 +349,10 @@ export default class PredictionContext extends EventEmitter<PredictionContextEve
       this.swallowPrediction = false;
     }
 
+    if(this.revertSuggestion) {
+      this.doRevert = true;
+    }
+
     // The rest is the same, whether from input or from "self-updating" after a reversion to provide new suggestions.
     this.sendUpdateEvent();
   }
diff --git a/web/src/engine/main/src/headless/transcriptionCache.ts b/web/src/engine/main/src/headless/transcriptionCache.ts
index af05853e17..9857983638 100644
--- a/web/src/engine/main/src/headless/transcriptionCache.ts
+++ b/web/src/engine/main/src/headless/transcriptionCache.ts
@@ -1,6 +1,6 @@
 import { Transcription } from "keyman/engine/js-processor";
 
-const TRANSCRIPTION_BUFFER_SIZE = 10;
+const TRANSCRIPTION_BUFFER_SIZE = 20;
 
 export class TranscriptionCache {
   private readonly map = new Map<number, Transcription>();

From 00bb86dc285651b8c266e2a8712bb2a15e2a09c9 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Fri, 17 Jan 2025 08:09:03 +0700
Subject: [PATCH 7/9] change(web): track prior context state while making
 predictions

---
 .../worker-thread/src/main/predict-helpers.ts            | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index c16eb4a474..dfd6761fed 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -117,6 +117,8 @@ export async function correctAndEnumerate(
   transformDistribution: Distribution<Transform>,
   context: Context
 ): Promise<{
+  preContextState: TrackedContextState; // TODO:  validate usefulness; remove if we can do without it.
+
   /**
    * For models that support correction-search caching, this provides the
    * cached object corresponding to this method's operation.
@@ -184,6 +186,7 @@ export async function correctAndEnumerate(
     }
 
     return {
+      preContextState: null,
       postContextState: null,
       rawPredictions: rawPredictions
     };
@@ -194,7 +197,7 @@ export async function correctAndEnumerate(
   // facilitates a more thorough correction-search pattern.
 
   // Token replacement benefits greatly from knowledge of the prior context state.
-  let { state: contextState } = contextTracker.analyzeState(
+  const { state: contextState } = contextTracker.analyzeState(
     lexicalModel,
     context,
     null
@@ -212,6 +215,7 @@ export async function correctAndEnumerate(
 
   if(isContextAtAcceptedSuggestionEdge(postContextState)) {
     return {
+      preContextState: contextState,
       postContextState: postContextState,
       // We're going to ignore whatever we'd generate, so just skip past this phase.
       rawPredictions: []
@@ -371,6 +375,7 @@ export async function correctAndEnumerate(
   // console.log(`execute: ${timer.executionTime}, deferred: ${timer.deferredTime}`); //, total since start: ${timer.timeSinceConstruction}`);
 
   return {
+    preContextState: contextState,
     postContextState: postContextState,
     rawPredictions: rawPredictions
   };
@@ -387,6 +392,8 @@ export async function correctAndEnumerate(
  */
 export function suggestFromPriorContext(
   postContextState: TrackedContextState,
+  // no priorContextState use?  Looks like it may be very useful for suggestion application layering...
+  // that is, applying an alternate suggestion on top of an applied one.
   inputTransform: Transform,
   punctuation: LexicalModelPunctuation
 ) {

From 18ae5d999259dc225a8748b17ec6962d610cad95 Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Mon, 20 Jan 2025 09:07:35 +0700
Subject: [PATCH 8/9] feat(web): adds unit test for delayed reversion logic

---
 .../mocha/cases/worker-model-compositor.js    | 88 +++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/web/src/engine/predictive-text/worker-thread/src/tests/mocha/cases/worker-model-compositor.js b/web/src/engine/predictive-text/worker-thread/src/tests/mocha/cases/worker-model-compositor.js
index 0774385026..fb4f4856f5 100644
--- a/web/src/engine/predictive-text/worker-thread/src/tests/mocha/cases/worker-model-compositor.js
+++ b/web/src/engine/predictive-text/worker-thread/src/tests/mocha/cases/worker-model-compositor.js
@@ -3,6 +3,7 @@
  */
 
 import ModelCompositor from '#./model-compositor.js';
+import { ContextTracker } from '#./correction/context-tracker.js';
 import { toAnnotatedSuggestion } from '#./predict-helpers.js';
 import * as models from '#./models/index.js';
 import * as wordBreakers from '@keymanapp/models-wordbreakers';
@@ -15,6 +16,93 @@ var TrieModel = models.TrieModel;
 
 describe('ModelCompositor', function() {
   describe('Prediction with 14.0+ models', function() {
+    it('allows reversion when reaching a previous suggestion via BKSP', async () => {
+      var plainModel = new TrieModel(jsonFixture('models/tries/english-1000'),
+        {wordBreaker: wordBreakers.default}
+      );
+
+      let compositor = new ModelCompositor(plainModel, true);
+      let contextTracker = compositor.contextTracker;
+
+      let startContext = {
+        left: "app",
+        right: "",
+        startOfBuffer: true,
+        endOfBuffer: true
+      }
+      let startDistribution = [
+        {
+          sample: {
+            insert: 'l',
+            deleteLeft: 0
+          },
+          p: 0.7
+        }, {
+          sample: {
+            insert: 'e',
+            deleteLeft: 0
+          }, 
+          p: 0.2
+        }, {
+          sample: {
+            insert: 'a',
+            deleteLeft: 1
+          }, 
+          p: 0.1
+        }
+      ];
+      // Fully generate predictions from 'appl' + 'e'|'y'.
+      let originalPredictions = await compositor.predict(startDistribution, startContext);
+
+      // Setup validation:  the predict() call should have placed the generated predictions here.
+      let originalAppliedState = contextTracker.newest;
+
+      assert.isOk(originalAppliedState.tail.replacements);
+      assert.sameDeepMembers(
+        originalAppliedState.tail.replacements.map((obj) => obj.suggestion.transform), 
+        originalPredictions.map((obj) => obj.transform)
+      );
+      // The suggestion has not yet been marked as applied.
+      assert.equal(originalAppliedState.tail.activeReplacementId, -1);
+
+      // Make sure it's marked as accepted!
+      let appliedSuggestion = originalPredictions.filter((entry) => entry.displayAs == 'applied')[0];
+      assert.isOk(appliedSuggestion);
+      // Is mutating the same token instance!!!
+      compositor.acceptSuggestion(appliedSuggestion, startContext, startDistribution[0].sample);
+      assert.isAtLeast(originalAppliedState.tail.activeReplacementId, 0);
+
+      let bkspContext = {
+        left: 'applied ',
+        right: '',
+        startOfBuffer: true,
+        endOfBuffer: true
+      };
+      let bkspTransform = {
+        insert: '',
+        deleteLeft: 1
+      };
+
+      // Now let's trigger a prediction that reaches the end of the predicted text (minus the backspace).
+      let reversionSuggestions = await compositor.predict(bkspTransform, bkspContext);
+      assert.isOk(reversionSuggestions);  
+
+      // Separate instances, but otherwise the same deep members... aside from two details:
+      // 1. 'keep' -> 'revert'
+      // 2. deleteLeft will have changed.
+      assert.isOk(reversionSuggestions.find((entry) => entry.tag == 'revert'));
+
+      // index 0 of the suggestions is generally held by the 'keep' suggestion, which doesn't manipulate context!
+      // index 1+ will hold standard delete-left values.
+      reversionSuggestions.forEach((entry) => entry.transform.deleteLeft = originalPredictions[1].transform.deleteLeft);
+
+      const revertSuggestion = reversionSuggestions.find((entry) => entry.tag == 'revert');
+      revertSuggestion.tag = 'keep';
+      revertSuggestion.transform.deleteLeft = 3;  // starter context was 'app'.
+      revertSuggestion.transform.insert += ' '; // delayed reversions don't include post-token whitespace
+      assert.sameDeepOrderedMembers(reversionSuggestions, originalPredictions);
+    });
+
     describe('Basic suggestion generation', function() {
       var plainModel = new TrieModel(jsonFixture('models/tries/english-1000'),
         {wordBreaker: wordBreakers.default}

From 8f544c48059a7be4dfdfc9ceb33027b74151f37d Mon Sep 17 00:00:00 2001
From: Joshua Horton <jahorton@sil.org>
Date: Mon, 20 Jan 2025 08:55:48 +0700
Subject: [PATCH 9/9] chore(web): temp work toward reapplications

---
 .../src/main/model-compositor.ts              | 29 +++++++++++++++++--
 .../worker-thread/src/main/predict-helpers.ts |  2 ++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index a033e14742..5e3df8e74d 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -237,7 +237,7 @@ export class ModelCompositor {
     }
 
     let revertedPrefix: string;
-    let postContextTokenization = this.tokenize(postContext);
+    const postContextTokenization = this.tokenize(postContext);
     if(postContextTokenization) {
       // Handles display string for reversions triggered by accepting a suggestion mid-token.
       // Also handles display string for reversions applied after previous applications - 
@@ -264,7 +264,7 @@ export class ModelCompositor {
     // Build the actual Reversion, which is technically an annotated Suggestion.
     // Since we're outside of the standard `predict` control path, we'll need to
     // set the Reversion's ID directly.
-    let reversion = toAnnotatedSuggestion(this.lexicalModel, firstConversion, 'revert');
+    const reversion = toAnnotatedSuggestion(this.lexicalModel, firstConversion, 'revert');
     if(suggestion.transformId != null) {
       reversion.transformId = -suggestion.transformId;
     }
@@ -291,6 +291,27 @@ export class ModelCompositor {
   }
 
   markAcceptedSuggestion(suggestion: Suggestion, context: Context, contextState: correction.TrackedContextState) {
+    // TODO:  Important - if there's an active replacement already, we may need special handling
+    // for calculating the reversion - we wish to restore the ORIGINAL, not the prior application
+    // when reverting.
+    if(contextState.tail.replacement) {
+      console.log(contextState.tail.replacement);
+      
+      // No trace of the original text (currently) remains (end of day 2025-01-17)
+      //
+      // Or is there one?  I found a tagged 'keep', even if not in the standard position within replacements.
+      // THAT could prove very useful!
+      //
+      // Question, though:  why is index 1 in the tricky case, not 0?  Something's shifting the position...
+      // ... because the applied replacement section gets placed in front!
+      // See `token.replacements = [ replacementPortion ].concat(suggestions);` down below!
+      let baseAppliedToken = contextState.tokens[contextState.tokens.length - contextState.tail.replacement.tokenWidth];
+      console.log(baseAppliedToken);
+
+      let reverterReplacment = baseAppliedToken.replacements.find((repl) => repl.suggestion.tag == 'keep');
+      console.log(reverterReplacment);
+    }
+
     // Suggestion IDs at this level are unique.
     contextState.tail.activeReplacementId = suggestion.id;
     contextState.tail.replacementTransformId = suggestion.transformId;
@@ -309,11 +330,15 @@ export class ModelCompositor {
     );
 
     // If context-tracking handles the applied suggestion properly...
+    //
+    // Edit the post-match state to hold suggestions that apply from the post-match context... wait...
+    // from the post-match context?  Do it from the original if possible; rely on Web-side context-rewind.
     if(matchResults?.baseState) {
       // Get the index of the first token altered by the suggestion being applied.
       let substitutionTokenIndex = (contextState.tokens.length - 1) - matchResults.headTokensRemoved;
 
       const tokenizer = determineModelTokenizer(this.lexicalModel)
+      // TODO:  
       let tokenizedApplication = tokenizeTransform(tokenizer, context, suggestion.transform);
 
       // We build our suggestions to do whole-word replacement.  Fortunately, that means we already have
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index dfd6761fed..8d0993183a 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -414,6 +414,8 @@ export function suggestFromPriorContext(
   suggestions.forEach((entry) => {
     entry.suggestion.transform.deleteLeft = deleteLeft;
     if(inputTransform.id) {
+      // TODO: Consider replacing with or annotating with the ORIGINAL ID - the one that prompted the
+      // original suggestion pass.
       entry.suggestion.transformId = inputTransform.id;
     }
   });