Skip to content

Commit

Permalink
Merge pull request #9687 from keymanapp/feat/core/9119-more-markers-e…
Browse files Browse the repository at this point in the history
…pic-ldml
  • Loading branch information
srl295 authored Oct 6, 2023
2 parents 692be91 + 25c20ee commit f3fd5aa
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 13 deletions.
4 changes: 2 additions & 2 deletions common/web/types/src/kmx/kmx-plus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ export class Vars extends Section {
return v[0];
}
}
substituteMarkerString(s : string) : string {
return MarkerParser.toSentinelString(s, this.markers);
substituteMarkerString(s : string, forMatch? : boolean) : string {
return MarkerParser.toSentinelString(s, this.markers, forMatch);
}
};

Expand Down
18 changes: 15 additions & 3 deletions common/web/types/src/ldml-keyboard/pattern-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ export class MarkerParser {
/** Max count of markers */
public static readonly MAX_MARKER_COUNT = constants.marker_max_count;

private static anyMarkerMatch() : string {
const start = (`0000` + (this.MIN_MARKER_INDEX).toString(16)).slice(-4);
const end = (`0000` + (this.MAX_MARKER_INDEX).toString(16)).slice(-4);
return `${this.SENTINEL}${this.MARKER_CODE}[\\u${start}-\\u${end}]`;
}

/** Expression that matches any marker */
public static readonly ANY_MARKER_MATCH = MarkerParser.anyMarkerMatch();

/**
* Pattern for matching a marker reference, OR the special marker \m{.}
*/
Expand All @@ -91,10 +100,13 @@ export class MarkerParser {
}

/** @returns all marker strings as sentinel values */
public static toSentinelString(s: string, markers?: OrderedStringList) : string {
public static toSentinelString(s: string, markers?: OrderedStringList, forMatch?: boolean) : string {
if (!s) return s;
return s.replaceAll(this.REFERENCE, (sub, arg) => {
if (arg === MarkerParser.ANY_MARKER_ID) {
if (forMatch) {
return this.ANY_MARKER_MATCH;
}
return MarkerParser.markerOutput(MarkerParser.ANY_MARKER_INDEX);
}
if (!markers) {
Expand All @@ -103,10 +115,10 @@ export class MarkerParser {
const order = markers.getItemOrder(arg);
if (order === -1) {
throw RangeError(`Internal Error: Could not find marker \\m{${arg}}`);
} else if(order >= MarkerParser.MAX_MARKER_INDEX) {
} else if(order > MarkerParser.MAX_MARKER_INDEX) {
throw RangeError(`Internal Error: marker \\m{${arg}} has out of range index ${order}`);
} else {
return MarkerParser.markerOutput(order+1);
return MarkerParser.markerOutput(order + 1);
}
});
}
Expand Down
16 changes: 16 additions & 0 deletions common/web/types/test/ldml-keyboard/test-pattern-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ describe('Test of Pattern Parsers', () => {
'a': 0,
'b': 1,
'c': 2,
'zz': MarkerParser.MAX_MARKER_INDEX - 1, // this is an ordering, so needs to be -1
'zzz': 0x2FFFFF,
};
const o = m[item];
Expand Down Expand Up @@ -103,6 +104,21 @@ describe('Test of Pattern Parsers', () => {
markers
)
);
// verify the matching behavior of these
assert.isTrue(new RegExp(MarkerParser.toSentinelString(`^Q\\m{a}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`Q\\m{a}`, markers, false)), `Q\\m{a} did not match`);
assert.isFalse(new RegExp(MarkerParser.toSentinelString(`^Q\\m{a}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`Q\\m{b}`, markers, false)), `Q\\m{a} should not match Q\\m{b}`);
assert.isTrue(new RegExp(MarkerParser.toSentinelString(`^Q\\m{.}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`Q\\m{a}`, markers, false)), `Q\\m{.} did not match Q\\m{a}`);
assert.isTrue(new RegExp(MarkerParser.toSentinelString(`^Q\\m{.}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`Q\\m{zz}`, markers, false)), `Q\\m{.} did not match Q\\m{zz} (max marker)`);
assert.isFalse(new RegExp(MarkerParser.toSentinelString(`^Q\\m{.}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`\\m{a}`, markers, false)), `Q\\m{.} did not match \\m{a}`);
assert.isTrue(new RegExp(MarkerParser.toSentinelString(`^\\m{.}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`\\m{a}`, markers, false)), `\\m{.} did not match \\m{a}`);
assert.isFalse(new RegExp(MarkerParser.toSentinelString(`^\\m{.}$`, markers, true), 'u')
.test(MarkerParser.toSentinelString(`\\m{a}\\m{b}`, markers, false)), `\\m{.} did not match \\m{a}\\m{b}`);
});
it('should match some marker constants', () => {
assert.equal(constants.uc_sentinel, KMXFile.UC_SENTINEL);
Expand Down
2 changes: 2 additions & 0 deletions core/src/ldml/C9134_ldml_markers.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ Note that this is different from other 0-based indices in KMX+. If there are thr
## Compiler (kmc)

- `U+FFFF` needs to be illegal as a literal or escaped sequence. So `\u{FFFF}` is not allowed, for example, nor as a literal in the UTF-8 .xml stream.
- Matching `\m{abc}` (some marker) will turn into a match for `U+FFFF U+0008 U+XXXX` for that match.
- Matching `\m{.}` (_any_ marker) will turn into the special sequence `U+FFFF U+0008 [U+0001-U+D7FE]` where the latter is a range

### `vars`

Expand Down
26 changes: 20 additions & 6 deletions core/tests/unit/ldml/keyboards/k_210_marker-test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@
<keystroke key="acute" />
<check result="+" />
</test>
<test name="marker-test-trailing-acute">
<startContext to="" />
<keystroke key="acute" />
<!-- TODO-LDML: broken, becasue without a rule to 'cleanup' trailing acute, we don't currently have code to fix it -->
<check result="" />
</test>
<test name="marker-test-trailing-grave">
<startContext to="" />
<keystroke key="grave" />
Expand All @@ -42,5 +36,25 @@
<keystroke key="acute" />
<check result="é" />
</test>
<test name="marker-test-any-marker">
<startContext to="" />
<keystroke key="acute" />
<keystroke key="z" />
<check result="Z" />
</test>
<test name="marker-test-trailing-acute">
<startContext to="" />
<keystroke key="acute" />
<check result="" />
<keystroke key="x" />
<check result="x" />
</test>
<test name="marker-test-trailing-acute2">
<startContext to="" />
<keystroke key="x" />
<check result="x" />
<keystroke key="acute" />
<check result="x" />
</test>
</tests>
</keyboardTest3>
3 changes: 3 additions & 0 deletions core/tests/unit/ldml/keyboards/k_210_marker.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
</layers>

<transforms type="simple">
<transformGroup>
<transform from="\m{.}z" to="Z" />
</transformGroup>
<transformGroup>
<transform from="C" to="\m{caret}" />
<transform from="H" to="\m{hacek}" />
Expand Down
4 changes: 2 additions & 2 deletions developer/src/kmc-ldml/src/compiler/tran.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ export class TransformCompiler<T extends TransformCompilerType, TranBase extends
}

// add in markers. idempotent if no markers.
cookedFrom = sections.vars.substituteMarkerString(cookedFrom); // TODO-LDML: need to support \m{.} here, maybe other edge cases
cookedTo = sections.vars.substituteMarkerString(cookedTo);
cookedFrom = sections.vars.substituteMarkerString(cookedFrom, true); // TODO-LDML: need to support \m{.} here, maybe other edge cases
cookedTo = sections.vars.substituteMarkerString(cookedTo, false);

result.from = sections.strs.allocAndUnescapeString(cookedFrom); // TODO-LDML: not unescaped here, done previously
result.to = sections.strs.allocAndUnescapeString(cookedTo); // TODO-LDML: not unescaped here, done previously
Expand Down

0 comments on commit f3fd5aa

Please sign in to comment.