diff --git a/docs/ldml/tr35-keyboards.md b/docs/ldml/tr35-keyboards.md
index 8dabaedf22c..af088c64346 100644
--- a/docs/ldml/tr35-keyboards.md
+++ b/docs/ldml/tr35-keyboards.md
@@ -2908,6 +2908,196 @@ This attribute specifies the expected resultant text in a document after process
+## Appendix: Pseudocode for Marker-safe Normalization
+The following is an informative section with some example pseudocode for marker-safe normalization into NFD.
+1. Markers are not text. This pseudocode assumes an implementation which is able to distinguish markers from plain text.
+2. Some optimization notes are given, but the emphasis here is on a clear and correct implementation.
+3. A distinction would need to be made between (A) markers in text, such as a keystroke emitting `\m{someMarker}` or a marker in input context, and (B) markers in transform match strings, such as `` or ``. An implementation could pass a parameter through the following code path distinguishing A and B, this distinction is not shown, for clarity.
+Example use:
+- `nfdMarkers('e\u{0300}' + Marker('marker1') + '\u{0320}a\u{0300}' + Marker('marker2') + '\u{0320}')`
+- Returns: `'e' + Marker('marker1') + '\u{0320}\u{0300}a' + Marker('marker2') + '\u{0320}\u{0300}'`
+ * Normalize a string-with-markers into NFD.
+ * @param input string with markers as out-of-band
+ * @returns normalized output as a string with markers
+ */
+export function nfdMarkers(input /* string with markers */) {
+ // to accumulate output text
+ let output = "";
+ // remove all markers
+ let noMarkers = removeMarkers(input);
+ // NFD the no-marker text.
+ let noMarkersNfd = normalizeNFD(noMarkers);
+ // Two quick checks.
+ if (noMarkers === input) {
+ // Heuristic: if there were no markers we can safely return noMarkersNFD
+ return noMarkersNFD;
+ } else if (noMarkersNFD === noMarkers) {
+ // Heuristic: if normalization made no change, return the input string: already NFD
+ return input;
+ }
+ let segmentStart = 0;
+ let segmentEnd = 0;
+ // look for normalization-safe segments
+ // 'i' = iterate by codepoints
+ do {
+ const haveMarker = markerAt(i); // true if there is a marker here
+ if (i === input.end) {
+ segmentEnd = i; // end of input - end of segmetn
+ } else if(hasNFDBoundaryBefore(i) && !have_marker) {
+ segmentEnd = i; // end of segment
+ i++; // move past the NFD char
+ } else if(have_marker) {
+ i += /* marker */; // skip past marker
+ } else {
+ // non boundary, just move index forward
+ i++;
+ }
+ if (segmentStart != segmentEnd) {
+ const segment = input.substring(segmentStart, segmentEnd);
+ const segmentNfd = nfdMarkersSegment(segment);
+ output.append(segmentNfd);
+ segmentStart = segmentEnd;
+ }
+ } while(segmentEnd != input.end);
+ return output;
+ * @param input segment text
+ * @returns normalized output segment
+ */
+function nfdMarkersSegment(input /* string with markers */) {
+ const noMarkers = removeMarkers(input);
+ const markerMap = parseMarkers(input);
+ const noMarkersNfd = normalizeNFD(noMarkers);
+ if (markerMap.empty()) {
+ // no markers in this segment.
+ return noMarkersNfd;
+ } else if(noMarkersNfd == noMarkers) {
+ // no change in this segment
+ return input;
+ } else {
+ return addBackMarkers(noMarkersNfd, markerMap);
+ }
+// represents a removed marker, to be re-added
+interface MarkerEntry {
+ glue: string; // 'glue' character
+ marker: Marker; // which marker
+ processed: boolean = false; // track whether this was processed or not
+ eot: boolean = false; // track whether end of text or not
+/** parse the string for markers */
+function parseMarkers(s: string): MarkerEntry[] {
+ // Note: parseMarkers() and removeMarkers() (not shown) could be the same function.
+ // queue of markers found
+ let lastMarkers: Marker[] = [];
+ // return value
+ let outputMap: MarkerEntry[] = [];
+ // iterate string by codepoints
+ for (i = s.begin; i< s.end;) {
+ if (markerAt(i)) {
+ // found a marker - add it to the list
+ lastMarkers.add(markerAt(i));
+ i += /* marker length */;
+ } else {
+ // From 'i' find the glue char for all markers found so far
+ // (in order)
+ // use first char of decomposed string
+ const glue = normalizeNFD(i).codePointAt(0);
+ for (m of markers) {
+ map.add({marker: m, glue: glue});
+ }
+ markers.clear();
+ i++; // move to next codepoint
+ }
+ }
+ // ran out of codepoints.
+ // any remaining markers are EOT
+ for (m of markers) {
+ map.add({marker: m, eot: true});
+ }
+ * re-add markers to input. Mutates markerMap.
+ * @param input normalized string to re-add markers
+ * @param markerMap list of glued markers. will be mutated
+ */
+function addBackMarkers(input : string, map: MarkerEntry[]) : stringWithMarkers {
+ // quick check
+ if(map.empty()) return input;
+ // output string
+ let out = '';
+ // first, add all of the EOT entries
+ while (!map.empty() && map[map.length - 1].eot) {
+ const m = map.popEnd(); // remove the last entry
+ out = m.marker + out; // prepend the marker
+ }
+ // now, process input codepoints from end to beginning
+ for (i=input.end;i>=input.begin;i--) {
+ // 0. prepend this codepoint
+ out = i + out;
+ // 1. any markers at the end of list which match get added
+ while (!map.empty() && map[map.length - 1].glue == i) {
+ // remove last entry, so we don't have to process it any more.
+ const m = map.popEnd();
+ if (m.processed == false) {
+ out = m.marker + out; // prepend marker
+ } // else we already processed it
+ }
+ // 2. look for any out of order markers, not right at the end
+ for (let m2 = map.length - 2; m2 >= 0; m2 --) {
+ const m = map[m2]; // peek at end of list
+ if (m.glue == i && !m.processed) {
+ m.processed = true; // so we don't process it again
+ out = m.marker + out; // prepend marker
+ }
+ }
+ }
+ return out;
+// Normalization functions (to be provided)
+function isNFD(s): boolean; // return true if in NFD
+function getCanonicalCombiningClass(c): int; // return the CCC according to UAX#44
+function normalizeNFD(s): string; // return NFD of a string
+// other functions (not shown)
+function removeMarkers(s): string; // strip out all markers from a string
+/** indicates whether a character does NOT interact with prior chars in
+ * normalization. See hasBoundaryBefore() in ICU. */
+function hasNFDBoundaryBefore(s: string): boolean {
+ // See UAX #15
+ return (isNFD(s) && getCanonicalCombiningClass(s) == 0);
* * *