Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(web): support reversion of accepted suggestions after edits #12884

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ import Suggestion = LexicalModelTypes.Suggestion;
import Transform = LexicalModelTypes.Transform;
import USVString = LexicalModelTypes.USVString;

/**
* Indicates that the owning token's 'replacement' represents part of an
* actually-applied suggestion.
*/
export const APPLIED_SUGGESTION_COMPONENT = -2;

function textToCharTransforms(text: string, transformId?: number) {
let perCharTransforms: Transform[] = [];

Expand Down Expand Up @@ -42,9 +48,15 @@ export class TrackedContextToken {
isWhitespace?: boolean;

transformDistributions: Distribution<Transform>[] = [];
replacements: TrackedContextSuggestion[];
replacements: TrackedContextSuggestion[] = [];
activeReplacementId: number = -1;

constructor();
constructor(instance: TrackedContextToken);
constructor(instance?: TrackedContextToken) {
Object.assign(this, instance);
}

get currentText(): string {
if(this.replacementText === undefined || this.replacementText === null) {
return this.raw;
Expand Down Expand Up @@ -85,7 +97,9 @@ export class TrackedContextToken {
});

this.raw = tokenText;
this.transformDistributions = backspacedTokenContext;
this.transformDistributions = backspacedTokenContext;
this.activeReplacementId = null;
this.replacements = []
}

update(transformDistribution: Distribution<Transform>, tokenText?: USVString) {
Expand All @@ -98,6 +112,8 @@ export class TrackedContextToken {

// Replace old token's raw-text with new token's raw-text.
this.raw = tokenText;
this.activeReplacementId = null;
this.replacements = []
}
}

Expand Down Expand Up @@ -314,14 +330,20 @@ interface ContextMatchResult {
* Should always be non-null if the token before the caret did not previously exist.
*/
preservationTransform?: Transform;

headTokensRemoved: number;
tailTokensAdded: number;
}

export class ContextTracker extends CircularArray<TrackedContextState> {
static attemptMatchContext(
tokenizedContext: Token[],
matchState: TrackedContextState,
transformSequenceDistribution?: Distribution<Transform[]>
transformSequenceDistribution?: Distribution<Transform[]>,
isApplyingSuggestion?: boolean
): ContextMatchResult {
isApplyingSuggestion ??= false;

// Map the previous tokenized state to an edit-distance friendly version.
let matchContext: USVString[] = matchState.toRawTokenization();

Expand All @@ -337,14 +359,18 @@ export class ContextTracker extends CircularArray<TrackedContextState> {

let editPath = mapping.editPath();

// When the context has but two tokens, the path algorithm tends to invert
// When the context has but three tokens, the path algorithm tends to invert
// 'insert' and 'substitute' from our preferred ordering for them.
// Logically, either order makes sense... but logic for other cases is
// far simpler if we have 'substitute' before 'insert'.
if(editPath.length == 2 && editPath[0] == 'insert' && editPath[1] == 'substitute') {
if(editPath.length == 3 && editPath[0] == 'insert' && editPath[1] == 'insert' && editPath[2] == 'substitute') {
editPath[0] = 'substitute';
editPath[2] = 'insert';
} else if(editPath.length == 2 && editPath[0] == 'insert' && editPath[1] == 'substitute') {
// This path may be reachable for languages without space-based wordbreaking.
editPath[0] = 'substitute';
editPath[1] = 'insert';
}
}

const firstMatch = editPath.indexOf('match');
let lastMatch = editPath.lastIndexOf('match');
Expand All @@ -370,11 +396,14 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
// If we have a perfect match with a pre-existing context, no mutations have
// happened; just re-use the old context state.
if(firstMatch == 0 && lastMatch == editPath.length - 1) {
return { state: matchState, baseState: matchState };
return { state: matchState, baseState: matchState, headTokensRemoved: 0, tailTokensAdded: 0 };
}

// If mutations HAVE happened, we have work to do.
let state = matchState;
if(isApplyingSuggestion) {
state = new TrackedContextState(state);
}

let priorEdit: typeof editPath[0];
let poppedTokenCount = 0;
Expand Down Expand Up @@ -412,6 +441,7 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
// does not land as part of the final token in the resulting context. This
// component should be preserved by any suggestions that get applied.
let preservationTransform: Transform;
let pushedTokenCount = 0;

// Now to update the end of the context window.
for(let i = lastMatch+1; i < editPath.length; i++) {
Expand Down Expand Up @@ -459,7 +489,10 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
}

const token = state.tokens[i - poppedTokenCount];
const matchToken = matchState.tokens[i];
const matchToken = isApplyingSuggestion ? new TrackedContextToken(matchState.tokens[i]) : matchState.tokens[i];
if(isApplyingSuggestion) {
state.tokens[i - poppedTokenCount] = matchToken;
}

// TODO: I'm beginning to believe that searchSpace should (eventually) be tracked
// on the tokens, rather than on the overall 'state'.
Expand Down Expand Up @@ -540,6 +573,7 @@ export class ContextTracker extends CircularArray<TrackedContextState> {

// Auto-replaces the search space to correspond with the new token.
state.pushTail(pushedToken);
pushedTokenCount++;
break;
case 'match':
// The default (Unicode) wordbreaker returns an empty token after whitespace blocks.
Expand All @@ -550,21 +584,47 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
continue;
}
// else 'fallthrough' / return null
default:
// No 'delete' should exist on the trailing edge of context when the
// context window slides. While it can happen due to keystrokes with
// `deleteLeft`, we keep a cache of recent contexts - an older one will
// likely match sufficiently.
// - may see 'delete' followed by 'substitute' in such cases.
case 'delete':
// While we do keep a cache of recent contexts, logic constraints for handling
// multitaps makes it tricky to reliably use in all situations.
// It's best to handle `delete` cases directly for this reason.
for(let j = i + 1; j < editPath.length; j++) {
// If something _other_ than delete follows a 'delete' on the edit path,
// we probably have a context mismatch.
//
// It's possible to construct cases where this isn't true, but it's likely not
// worth trying to handle such rare cases.
if(editPath[j] != 'delete') {
return null;
}
}

// If ALL that remains are deletes, we're good to go.
//
// No 'transform' edits should exist within this section, either.
// This may not be the token at the index, but since all that remains are deletes,
// we'll have deleted the correct total number from the end once all iterations
// are done.
if(state == matchState) {
state = new TrackedContextState(state);
}

state.tokens.pop();
break;
default:
// No 'transform' edits should exist within this section.
return null;
}

priorEdit = editPath[i];
}

return { state, baseState: matchState, preservationTransform };
return {
state,
baseState: matchState,
preservationTransform,
headTokensRemoved: poppedTokenCount,
tailTokensAdded: pushedTokenCount
};
}

private static modelContextState(
Expand Down Expand Up @@ -623,7 +683,8 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
analyzeState(
model: LexicalModel,
context: Context,
transformDistribution?: Distribution<Transform>
transformDistribution?: Distribution<Transform>,
isApplyingSuggestion?: boolean
): ContextMatchResult {
if(!model.traverseFromRoot) {
// Assumption: LexicalModel provides a valid traverseFromRoot function. (Is technically optional)
Expand Down Expand Up @@ -676,7 +737,7 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
continue;
}

let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution);
let result = ContextTracker.attemptMatchContext(tokenizedContext.left, this.item(i), tokenizedDistribution, isApplyingSuggestion);

if(result?.state) {
// Keep it reasonably current! And it's probably fine to have it more than once
Expand Down Expand Up @@ -704,7 +765,7 @@ export class ContextTracker extends CircularArray<TrackedContextState> {
let state = ContextTracker.modelContextState(tokenizedContext.left, model);
state.taggedContext = context;
this.enqueue(state);
return { state, baseState: null };
return { state, baseState: null, headTokensRemoved: 0, tailTokensAdded: 0 };
}

clearCache() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as models from '@keymanapp/models-templates';
import * as correction from './correction/index.js'
import APPLIED_SUGGESTION_COMPONENT = correction.APPLIED_SUGGESTION_COMPONENT;

import TransformUtils from './transformUtils.js';
import { correctAndEnumerate, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
Expand All @@ -14,10 +15,12 @@ import Reversion = LexicalModelTypes.Reversion;
import Suggestion = LexicalModelTypes.Suggestion;
import Transform = LexicalModelTypes.Transform;
import USVString = LexicalModelTypes.USVString;
import { tokenizeTransform } from './correction/transform-tokenization.js';
import { deepCopy } from '@keymanapp/web-utils';

export class ModelCompositor {
private lexicalModel: LexicalModel;
private contextTracker?: correction.ContextTracker;
readonly lexicalModel: LexicalModel;
readonly contextTracker?: correction.ContextTracker;

static readonly MAX_SUGGESTIONS = 12;
readonly punctuation: LexicalModelPunctuation;
Expand Down Expand Up @@ -129,6 +132,41 @@ export class ModelCompositor {
const timer = this.activeTimer = new correction.ExecutionTimer(this.testMode ? Number.MAX_VALUE : SEARCH_TIMEOUT, this.testMode ? Number.MAX_VALUE : SEARCH_TIMEOUT * 1.5);

const { postContextState, rawPredictions } = await correctAndEnumerate(this.contextTracker, this.lexicalModel, timer, transformDistribution, context);

// TODO: maybe we should find a way to duplicate the checks below (re revertible contexts)
// and use them inside `correctAndEnumerate` - why burn literal milliseconds of compute
// time when we know we'll throw those results away?

// Check: did we just reach the tail of a prior token via BKSP?
if(postContextState.tail.replacements?.length > 0) {
// Bypass standard suggestion stuff; re-offer the old suggestions + display a revert option.
const currentToken = postContextState.tail;

// Original suggestion set: acquired. But... they're based on a different context...
const appliedSuggestion = currentToken?.replacement;

// We added a partial, synthetic 'replacement' useful for tracking how much to revert.
// It should have a set, distinct ID corresponding to the check below. This partial
// version was never a real suggestion; even if it were, the original is still within our list!
const suggestions = currentToken.replacements.filter((entry) => entry != appliedSuggestion).map((entry) => deepCopy(entry));

// Verify: were we successfully able to set up a 'rewind' / revertible scenario?
if(appliedSuggestion?.suggestion?.id == APPLIED_SUGGESTION_COMPONENT) {
// We were? It's GO TIME!

// First up: we can safely use the raw insert string as the needed length to erase.
const deleteLeft = appliedSuggestion.suggestion.transform.insert.kmwLength();
suggestions.forEach((entry) => entry.suggestion.transform.deleteLeft = deleteLeft);

const keepSuggestion = suggestions.find((entry) => entry.suggestion.tag == 'keep');
// Convert the original 'keep' suggestion into a REVERT suggestion; this will restore the
// original context.
keepSuggestion.suggestion.tag = 'revert';

// Bypass the rest of the correction-search; just use these!
return suggestions.map((entry) => entry.suggestion);
}
}

if(this.activeTimer == timer) {
this.activeTimer = null;
Expand Down Expand Up @@ -267,9 +305,57 @@ export class ModelCompositor {
contextState = this.contextTracker.analyzeState(this.lexicalModel, context).state;
}

// Suggestion IDs at this level are unique.
contextState.tail.activeReplacementId = suggestion.id;
let acceptedContext = models.applyTransform(suggestion.transform, context);
this.contextTracker.analyzeState(this.lexicalModel, acceptedContext);
contextState.taggedContext = context;

// Hmm... need to mark the 'accepted' aspect of this in some way...
// Maybe on the state itself? No, won't work - not after multiple BKSP.
//
// We know which state it should match; all there is to do is actually do the bookkeeping.
const matchResults = this.contextTracker.analyzeState(
this.lexicalModel,
context,
[{
sample: suggestion.transform,
p: 1.0
}],
true
);

// If context-tracking handles the applied suggestion properly...
if(matchResults?.baseState) {
// Get the index of the first token altered by the suggestion being applied.
let substitutionTokenIndex = (contextState.tokens.length - 1) - matchResults.headTokensRemoved;

const tokenizer = determineModelTokenizer(this.lexicalModel)
let tokenizedApplication = tokenizeTransform(tokenizer, context, suggestion.transform);

// We build our suggestions to do whole-word replacement. Fortunately, that means we already have
// the full suggestions!
const suggestions = contextState.tail.replacements;
if(suggestions && (substitutionTokenIndex + tokenizedApplication.length == matchResults.state.tokens.length)) {

for(let j = 1; j <= tokenizedApplication.length; j++) {
const replacementPortion: correction.TrackedContextSuggestion = {
suggestion: {
...suggestion,
id: APPLIED_SUGGESTION_COMPONENT, // Actual suggestions always present non-negative IDs; this can uniquely mark
// this as a component of an applied suggestion.
transform: tokenizedApplication.slice(0, j).reduce((accum, current) => models.buildMergedTransform(accum, current), { insert: '', deleteLeft: 0}),
},
tokenWidth: j
}

const token = matchResults.state.tokens[substitutionTokenIndex + j - 1];
// Attach our fragmented version of the suggestion - the part useful for rewinding it -
// as its own suggestion with a distinct, unique ID indicative of this property.
token.replacements = [ replacementPortion ].concat(suggestions);
token.activeReplacementId = APPLIED_SUGGESTION_COMPONENT; // perhaps give unique ID + overwrite the original suggestion ID.
}
}
// else: we're not confident we can map the replacement details safely to do reversion later
}
}

return reversion;
Expand Down
Loading
Loading