From e07314141b8d3348611571c7c5d219ddeee17139 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Tue, 13 Aug 2024 18:09:54 -0700
Subject: [PATCH] make CLDR radical-stroke order = UAX38

---
 .../CollationTest_NON_IGNORABLE.txt           |   2 +-
 .../CollationTest_NON_IGNORABLE_SHORT.txt     |   2 +-
 .../CollationTest/CollationTest_SHIFTED.txt   |   2 +-
 .../CollationTest_SHIFTED_SHORT.txt           |   2 +-
 .../draft/GenerateUnihanCollators.java        |  79 ++---
 .../org/unicode/text/UCA/RadicalStroke.java   | 314 +++++++++---------
 6 files changed, 195 insertions(+), 206 deletions(-)

diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt
index 4de1b642a..fde4f9169 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt
@@ -1,5 +1,5 @@
 # CollationTest_NON_IGNORABLE.txt
-# Date: 2024-06-05, 18:49:37 GMT
+# Date: 2024-08-14, 00:51:38 GMT
 # © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt
index d03e4ee81..6cedac715 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt
@@ -1,5 +1,5 @@
 # CollationTest_NON_IGNORABLE_SHORT.txt
-# Date: 2024-06-05, 18:49:39 GMT
+# Date: 2024-08-14, 00:51:39 GMT
 # © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt
index 5ba2ce5ae..13abb6b2b 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt
@@ -1,5 +1,5 @@
 # CollationTest_SHIFTED.txt
-# Date: 2024-06-05, 18:49:40 GMT
+# Date: 2024-08-14, 00:51:40 GMT
 # © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt
index 4d1117edc..6469a5625 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt
@@ -1,5 +1,5 @@
 # CollationTest_SHIFTED_SHORT.txt
-# Date: 2024-06-05, 18:49:41 GMT
+# Date: 2024-08-14, 00:51:41 GMT
 # © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/src/main/java/org/unicode/draft/GenerateUnihanCollators.java b/unicodetools/src/main/java/org/unicode/draft/GenerateUnihanCollators.java
index 5e75b4165..739fb484c 100644
--- a/unicodetools/src/main/java/org/unicode/draft/GenerateUnihanCollators.java
+++ b/unicodetools/src/main/java/org/unicode/draft/GenerateUnihanCollators.java
@@ -600,12 +600,12 @@ private static void showOldData(Collator collator, String name, boolean japanese
             int strokes = CldrUtility.ifNull(bestStrokesS.get(item), 0);
             buffer.append(pad(String.valueOf(strokes), 3)).append(";\t");
 
-            int data = getRSShortData(item.codePointAt(0));
+            long order = getRSOrder(item.codePointAt(0));
             String radical = null;
             String remainingStrokes = null;
-            if (data != 0) {
-                radical = radicalStroke.getRadicalStringFromShortData(data);
-                remainingStrokes = RadicalStroke.getResidualStrokesFromShortData(data) + "";
+            if (order != 0) {
+                radical = radicalStroke.getRadicalString(order);
+                remainingStrokes = RadicalStroke.getResidualStrokes(order) + "";
             }
             buffer.append(pad(radical, 4)).append(";\t");
             buffer.append(pad(remainingStrokes, 2)).append(";\t");
@@ -1017,14 +1017,14 @@ public static void addToStrokeInfo(UnicodeMap<Integer> bestStrokesIn, boolean si
             for (final String s : bestStrokesIn) {
                 final int c = s.codePointAt(0);
                 final Integer bestStrokeInfo = bestStrokesIn.get(c);
-                int data = getRSShortData(c);
-                if (data == 0) {
+                long order = getRSOrder(c);
+                if (order == 0) {
                     continue;
                 }
-                int radical = RadicalStroke.getRadicalNumberFromShortData(data);
+                int radical = RadicalStroke.getRadicalNumber(order);
                 final int radicalsStrokes =
-                        bestStrokeInfo - RadicalStroke.getResidualStrokesFromShortData(data);
-                if (!RadicalStroke.isSimplifiedFromShortData(data)) {
+                        bestStrokeInfo - RadicalStroke.getResidualStrokes(order);
+                if (!RadicalStroke.isSimplified(order)) {
                     mainStrokesTotal.add(radical, radicalsStrokes);
                     mainCount.add(radical, 1);
                 } else {
@@ -1057,11 +1057,11 @@ public static void addToStrokeInfo(UnicodeMap<Integer> bestStrokesIn, boolean si
             for (final String s :
                     new UnicodeSet(kRSUnicode.keySet()).removeAll(bestStrokesIn.keySet())) {
                 int c = s.codePointAt(0);
-                int data = getRSShortData(c);
-                int radical = RadicalStroke.getRadicalNumberFromShortData(data);
+                long order = getRSOrder(c);
+                int radical = RadicalStroke.getRadicalNumber(order);
                 final int computedStrokes =
-                        RadicalStroke.getResidualStrokesFromShortData(data)
-                                + (RadicalStroke.isSimplifiedFromShortData(data)
+                        RadicalStroke.getResidualStrokes(order)
+                                + (RadicalStroke.isSimplified(order)
                                         ? alternateStrokes[radical]
                                         : mainStrokes[radical]);
                 bestStrokesIn.put(s, computedStrokes);
@@ -1079,48 +1079,33 @@ public static void addToStrokeInfo(UnicodeMap<Integer> bestStrokesIn, boolean si
         }
     }
 
-    private static int getRSShortData(int c) {
-        int data = radicalStroke.getShortDataForCodePoint(c);
-        if (data != 0) {
-            return data;
-        }
-        if (c < 0x3000) {
-            String radical = radicalMap.get(c);
-            if (radical == null) {
-                return 0;
-            }
-            c = radical.codePointAt(0);
-            assert radical.length() == Character.charCount(c); // single code point
-            data = radicalStroke.getShortDataForCodePoint(c);
-            assert data != 0;
-            return data;
-        }
-        String decomp = nfd.normalize(c);
-        c = decomp.codePointAt(0);
-        data = radicalStroke.getShortDataForCodePoint(c);
-        return data;
-    }
-
-    private static long getRSLongOrder(int c) {
-        long order = radicalStroke.getLongOrder(c);
+    private static long getRSOrder(int c) {
+        long order = radicalStroke.getOrderForCodePoint(c);
         if (order != 0) {
             return order;
         }
         if (c < 0x3000) {
             String radical = radicalMap.get(c);
             if (radical == null) {
-                // Not an ideograph, sort higher than any of them.
-                return ((long) Integer.MAX_VALUE << 32) | c;
+                return 0;
             }
             c = radical.codePointAt(0);
             assert radical.length() == Character.charCount(c); // single code point
-            order = radicalStroke.getLongOrder(c);
+            order = radicalStroke.getOrderForCodePoint(c);
             assert order != 0;
             return order;
         }
         String decomp = nfd.normalize(c);
         c = decomp.codePointAt(0);
-        order = radicalStroke.getLongOrder(c);
+        return radicalStroke.getOrderForCodePoint(c);
+    }
+
+    /**
+     * Same as getRSOrder() but if c does not have radical-stroke data, then this function returns a
+     * value higher than that for any ideograph.
+     */
+    private static long getRSOrderOrHigh(int c) {
+        long order = getRSOrder(c);
         if (order == 0) {
             // Not an ideograph, sort higher than any of them.
             order = ((long) Integer.MAX_VALUE << 32) | c;
@@ -1850,8 +1835,8 @@ public int compare(String s1, String s2) {
                     assert Character.charCount(c1) == s1.length();
                     int c2 = s2.codePointAt(0);
                     assert Character.charCount(c2) == s2.length();
-                    long order1 = getRSLongOrder(c1);
-                    long order2 = getRSLongOrder(c2);
+                    long order1 = getRSOrderOrHigh(c1);
+                    long order2 = getRSOrderOrHigh(c2);
                     if (order1 != order2) {
                         return order1 < order2 ? -1 : 1;
                     }
@@ -1940,13 +1925,13 @@ private static String getIndexValue(InfoType infoType, String s, Output<String>
                 break;
             case radicalStroke:
                 final int codepoint = s.codePointAt(0);
-                int data = getRSShortData(codepoint);
-                if (data == 0) {
+                long order = getRSOrder(codepoint);
+                if (order == 0) {
                     throw new IllegalArgumentException(
                             "Missing R-S data for U+" + Utility.hex(codepoint));
                 }
-                rest = radicalStroke.getRadicalCharFromShortData(data);
-                comment.value = radicalStroke.getRadicalStringFromShortData(data);
+                rest = radicalStroke.getRadicalChar(order);
+                comment.value = radicalStroke.getRadicalString(order);
                 break;
             case stroke:
                 final Integer strokeCount = getStrokeValue(s, bestStrokesT);
diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/RadicalStroke.java b/unicodetools/src/main/java/org/unicode/text/UCA/RadicalStroke.java
index 71f6d9f0a..ec951c007 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCA/RadicalStroke.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCA/RadicalStroke.java
@@ -16,6 +16,7 @@
 
 public final class RadicalStroke {
     private static final int MAX_RADICAL_NUMBER = 214;
+    private static final int MAX_STROKES = 255;
 
     /**
      * The Unicode 1.1 Unihan block was U+4E00..U+9FA5. The ideographs there were allocated in
@@ -25,6 +26,14 @@ public final class RadicalStroke {
 
     private static final boolean DEBUG = false;
 
+    // Bit field shift values (low-order bit numbers) in the combined "order".
+    private static final int RADICAL_SHIFT = 36;
+    private static final int STROKE_SHIFT = 28;
+    private static final int SIMPLIFIED_SHIFT = 24;
+    private static final int EXTENSION_SHIFT = 20;
+    private static final int MAX_EXTENSION = 0xf;
+    private static final int CODE_POINT_MASK = 0xfffff;
+
     private static final int SIMPLIFIED_NUM_BITS = 2;
 
     private String unicodeVersion;
@@ -75,24 +84,12 @@ public RadicalStroke(String unicodeVersion) {
         while (hanIter.next()) {
             int c = hanIter.codepoint;
             assert c >= 0;
-            // Create an "order" collation key similar to the one in
-            // https://www.unicode.org/reports/tr38/#SortingAlgorithm
-            // Sorting Algorithm Used by the Radical-Stroke Charts.
-            //
-            // extension (1 bit):
-            // Sort the original Unihan block first, then all extension blocks.
-            // We also include the code point in the "order".
-            // No need to determine the extension block number since they sort in code point order.
-            // We don't use special values for compatibility ideographs.
-            int extension =
-                    (0x4E00 <= c && c <= 0xFFFF)
-                            ? 0
-                            : 1; // see UCA implicit weights BASE FB40 vs. FB80
+            int extension = getExtension(c);
             String rs = rsUnicode.get(c);
             if (rs == null) {
                 // Sort characters with missing radical-stroke data last.
                 // Maximum radical numbers, simplified, and residual strokes.
-                rs = "214''.63";
+                rs = "214'''.254";
             }
             // Use only the first radical-stroke value if there are multiple.
             int delim = rs.indexOf(' '); // value separator in Unihan data files
@@ -104,24 +101,7 @@ public RadicalStroke(String unicodeVersion) {
             }
             int dot = rs.indexOf('.');
             assert 0 <= dot && dot < delim;
-            // simplified (2 bits):
-            // - 0 = traditional form for the radical (for example, 齒)
-            // - 1 = Chinese simplified form of the radical (for example, 齿)
-            // - 2 = non-Chinese simplified form of the radical (for example, 歯)
-            //       [new in Unicode 15.1]
-            int simplified = 0;
-            int radicalNumberLimit = dot;
-            if (rs.charAt(radicalNumberLimit - 1) == '\'') {
-                simplified = 1;
-                --radicalNumberLimit;
-                if (rs.charAt(radicalNumberLimit - 1) == '\'') {
-                    simplified = 2;
-                    --radicalNumberLimit;
-                }
-            }
-            int radicalNumber = parseInt(rs, 0, radicalNumberLimit);
-            int radicalNumberAndSimplified =
-                    makeRadicalNumberAndSimplified(radicalNumber, simplified);
+            int radicalNumberAndSimplified = parseRadicalNumberAndSimplified(rs, 0, dot);
             int residualStrokeCount = parseInt(rs, dot + 1, delim);
             long order = makeOrder(radicalNumberAndSimplified, residualStrokeCount, extension, c);
             if (DEBUG) {
@@ -197,10 +177,54 @@ public RadicalStroke(String unicodeVersion) {
         // or there is a bug in the code.
         // Turn on the DEBUG flag and see if we can manually remove some characters from the set
         // so that a sequence of following ones does not get removed.
-        assert numOutOfOrder <= 320;
+        // TODO: Before changing the sort order to conform to UAX #38, demoting the simplified-ness
+        // of radicals to below the number of residual strokes,
+        // this successfully asserted numOutOfOrder <= 320.
+        // Find out if this is a known issue.
+        assert numOutOfOrder <= 1500;
         hanNotInCPOrder = new UnicodeSet(hanSet).removeAll(hanInCPOrder).freeze();
     }
 
+    // Triples of (start, end, extension) for coalesced UAX #38 order blocks.
+    // Read in order, so ranges can overlap.
+    private static final int[] EXTENSION_TRIPLES = {
+        // The original Unihan block sorts before extension A.
+        // CJK Unified Ideographs block
+        0x4E00, 0x9FFF, 0,
+        // Compatibility ideographs sort last.
+        // CJK Compatibility Ideographs
+        0xF900, 0xFAFF, 0xf,
+        // CJK Compatibility Ideographs Supplement
+        0x2F800, 0x2FA1F, 0xf,
+        // Extension I pokes a hole in the following range and sorts between H & J.
+        // CJK Unified Ideographs Extension I
+        0x2EBF0, 0x2EE5F, 2,
+        // Extensions A..H sort after the original Unihan block.
+        // CJK Unified Ideographs Extension A, B, C, D, E, F, G, H
+        0x3400, 0x323AF, 1,
+        // J+ after I.
+        // CJK Unified Ideographs Extension J+
+        // TODO: This needs adjustments when another extension block is encoded out of letter order.
+        // https://www.unicode.org/roadmaps/tip/
+        0x323B0, 0x37FFF, 3,
+    };
+
+    private static final int getExtension(int c) {
+        // https://www.unicode.org/reports/tr38/#SortingAlgorithm
+        // at a low level sorts by Block, then by code point.
+        // This function determines an extension value so that extension|code point sorts
+        // in the same order as UAX #38 block|code point.
+        // We simply coalesce adjacent blocks that sort in code point order.
+        for (int i = 0; i < EXTENSION_TRIPLES.length; i += 3) {
+            int start = EXTENSION_TRIPLES[i];
+            int end = EXTENSION_TRIPLES[i + 1];
+            if (start <= c && c <= end) {
+                return EXTENSION_TRIPLES[i + 2];
+            }
+        }
+        throw new IllegalArgumentException("cannot find extension value for U+" + Utility.hex(c));
+    }
+
     private static final int makeRadicalNumberAndSimplified(int radical, int simplified) {
         assert 1 <= radical && radical <= MAX_RADICAL_NUMBER;
         assert 0 <= simplified && simplified <= 3;
@@ -209,20 +233,20 @@ private static final int makeRadicalNumberAndSimplified(int radical, int simplif
 
     private static final long makeOrder(
             int radicalNumberAndSimplified, int residualStrokeCount, int extension, int c) {
-        assert residualStrokeCount <= 255;
-        assert extension == 0 || extension == 1;
-        return ((long) radicalNumberAndSimplified << 34)
-                | (residualStrokeCount << 24)
-                | (extension << 23)
+        assert residualStrokeCount <= MAX_STROKES;
+        assert 0 <= extension && extension <= MAX_EXTENSION;
+        assert 0 <= c && c <= CODE_POINT_MASK;
+        int radical = radicalNumberAndSimplified >> SIMPLIFIED_NUM_BITS;
+        int simplified = radicalNumberAndSimplified & 3;
+        return ((long) radical << RADICAL_SHIFT)
+                | ((long) residualStrokeCount << STROKE_SHIFT)
+                | (simplified << SIMPLIFIED_SHIFT)
+                | (extension << EXTENSION_SHIFT)
                 | c;
     }
 
-    private static final int getRadicalNumberAndSimplified(long order) {
-        return (int) (order >> 34);
-    }
-
     private static final int getCodePoint(long order) {
-        return (int) (order & 0x1fffff);
+        return (int) (order & CODE_POINT_MASK);
     }
 
     public void printRadicalStrokeOrder(Writer writer) throws IOException {
@@ -243,12 +267,18 @@ private int printNextRadical(int pos, Writer writer) throws IOException {
         }
         StringBuilder sb = new StringBuilder("[radical ");
         long order = orderedHan[pos];
-        int radicalNumberAndSimplified = getRadicalNumberAndSimplified(order);
-        String radicalChars = radToChars[radicalNumberAndSimplified];
-        sb.append(getRadicalStringFromShortData(getShortData(order)))
-                .append('=')
-                .append(radicalChars)
-                .append(':');
+        sb.append(getRadicalString(order)).append('=');
+        int radicalNumber = getRadicalNumber(order);
+        // Append the radicals and ideographs for each of the traditional and simplified forms.
+        for (int i = radicalNumber << SIMPLIFIED_NUM_BITS, limit = i + (1 << SIMPLIFIED_NUM_BITS);
+                i < limit;
+                ++i) {
+            String radicalChars = radToChars[i];
+            if (radicalChars != null) {
+                sb.append(radicalChars);
+            }
+        }
+        sb.append(':');
         int start = 0;
         int prev = 0;
         do {
@@ -271,7 +301,7 @@ private int printNextRadical(int pos, Writer writer) throws IOException {
                 break;
             }
             order = orderedHan[pos];
-        } while (getRadicalNumberAndSimplified(order) == radicalNumberAndSimplified);
+        } while (getRadicalNumber(order) == radicalNumber);
         if (start < prev) {
             // Finish the last range.
             if ((start + 2) <= prev) { // at least 3 code points
@@ -295,19 +325,18 @@ public void printUnihanIndex(Writer writer) throws IOException {
         for (int pos = 0; ; ) {
             long order = orderedHan[pos];
             int c = getCodePoint(order); // First code point for the radical.
-            int radicalNumberAndSimplified = getRadicalNumberAndSimplified(order);
+            int radicalNumber = getRadicalNumber(order);
             // For the representative radical character,
             // use the unified ideograph which is almost always in the original Unihan block
             // which has good font support,
             // rather than the character in the radicals block (if there is such a character).
-            int ideograph =
-                    getUnifiedIdeographForRadicalNumberAndSimplified(radicalNumberAndSimplified);
+            int ideograph = getUnifiedIdeograph(order);
             sb.replace(0, sb.length(), "&")
                     .appendCodePoint(c)
                     .append("=\\uFDD0")
                     .appendCodePoint(ideograph)
                     .append(" # radical ")
-                    .append(getRadicalStringFromShortData(getShortData(order)))
+                    .append(getRadicalString(order))
                     .append('\n');
             writer.append(sb);
             do {
@@ -315,7 +344,7 @@ public void printUnihanIndex(Writer writer) throws IOException {
                     return;
                 }
                 order = orderedHan[pos];
-            } while (getRadicalNumberAndSimplified(order) == radicalNumberAndSimplified);
+            } while (getRadicalNumber(order) == radicalNumber);
         }
     }
 
@@ -333,92 +362,80 @@ public UnicodeSet getHanNotInCPOrder() {
      * Returns a long for the UCA order of ideographs, including the code point tie-breaker. Returns
      * 0 for non-ideographs.
      */
-    public long getLongOrder(int cp) {
-        return getDataForCodePoint(cp);
-    }
-
-    /**
-     * Returns data in bit sets: 19..12=radicalNumber, 11..10=simplified, 7..0=residualStrokes.
-     * Returns 0 for non-ideographs.
-     */
-    public int getShortDataForCodePoint(int cp) {
-        return getShortData(getDataForCodePoint(cp));
-    }
-
-    // TODO: Why not always work with long order?
-    // If we always get the "short data" from the long order, then these just seem like
-    // unnecessarily duplicate APIs.
-    private static int getShortData(long order) {
-        return (int) (order >> 24);
-    }
-
-    private static int getRadicalNumberAndSimplifiedFromShortData(int data) {
-        assert data != 0;
-        int radicalNumberAndSimplified = data >> 10;
-        assert radicalNumberAndSimplified >= makeRadicalNumberAndSimplified(1, 0); // radical >= 1
-        return radicalNumberAndSimplified;
-    }
-
-    public static int getRadicalNumberFromShortData(int data) {
-        assert data != 0;
-        return data >> 12;
+    public long getOrderForCodePoint(int cp) {
+        // There is no Arrays.binarySearch(long[], ...) that takes a Comparator.
+        int start = 0;
+        int limit = rawHan.length;
+        while (start < limit) {
+            int i = (start + limit) / 2;
+            int midCP = getCodePoint(rawHan[i]);
+            if (cp < midCP) {
+                limit = i;
+            } else if (cp > midCP) {
+                start = i + 1;
+            } else /* == */ {
+                return rawHan[i];
+            }
+        }
+        return 0; // not found
     }
 
-    public static int getSimplifiedFromShortData(int data) {
-        assert data != 0;
-        return (data >> 10) & 3;
+    public static int getRadicalNumber(long order) {
+        assert order != 0;
+        return (int) (order >> RADICAL_SHIFT);
     }
 
-    public static boolean isSimplifiedFromShortData(int data) {
-        assert data != 0;
-        return (data & 0xc00) != 0;
+    public static boolean isSimplified(long order) {
+        assert order != 0;
+        return ((order >> SIMPLIFIED_SHIFT) & 3) != 0;
     }
 
-    public static int getResidualStrokesFromShortData(int data) {
-        assert data != 0;
-        return data & 0xff;
+    public static int getResidualStrokes(long order) {
+        assert order != 0;
+        return (int) (order >> STROKE_SHIFT) & 0xff;
     }
 
-    /** Returns the radical character for its number and simplified-ness. */
-    public String getRadicalCharFromShortData(int data) {
-        int radicalNumberAndSimplified = getRadicalNumberAndSimplifiedFromShortData(data);
-        if (radicalNumberAndSimplified >= radToChar.length) {
+    /**
+     * Returns the radical character (traditional form) for its number. Returns the unified
+     * ideograph if there is no radical character for this number.
+     */
+    public String getRadicalChar(long order) {
+        int radicalNumber = getRadicalNumber(order);
+        if (radicalNumber >= radToChar.length) {
             return null;
         }
-        String s = radToChar[radicalNumberAndSimplified];
+        String s = radToChar[radicalNumber];
         if (s != null) {
             return s;
         }
-        // For some radicals there is no character in the radicals block. Return the unified
-        // ideograph.
-        return radToChars[radicalNumberAndSimplified];
+        // For some radicals there is no character in the radicals block.
+        // Return the unified ideograph.
+        // Since there is no radical character, the ideograph is the only
+        // character in this string.
+        return radToChars[radicalNumber];
     }
 
-    /** Returns a string like "90" or "90'". */
-    public String getRadicalStringFromShortData(int data) {
-        int radicalNumberAndSimplified = getRadicalNumberAndSimplifiedFromShortData(data);
+    public String getRadicalString(long order) {
+        int radicalNumberAndSimplified = getRadicalNumber(order) << SIMPLIFIED_NUM_BITS;
         return radicalNumberAndSimplified < radicalStrings.length
                 ? radicalStrings[radicalNumberAndSimplified]
                 : null;
     }
 
-    private int getUnifiedIdeographForRadicalNumberAndSimplified(int radicalNumberAndSimplified) {
-        String radicalChars = radToChars[radicalNumberAndSimplified];
+    private int getUnifiedIdeograph(long order) {
+        int radicalNumber = getRadicalNumber(order);
+        String radicalChars = radToChars[radicalNumber << SIMPLIFIED_NUM_BITS];
         if (radicalChars == null) {
-            int radical = radicalNumberAndSimplified >> SIMPLIFIED_NUM_BITS;
-            int simplified = radicalNumberAndSimplified & ((1 << SIMPLIFIED_NUM_BITS) - 1);
-            if (radical == MAX_RADICAL_NUMBER && simplified == 2) {
+            if (radicalNumber == MAX_RADICAL_NUMBER && getResidualStrokes(order) == 254) {
                 // Special entry for missing radical-stroke data.
                 return '?';
             }
-            throw new IllegalArgumentException(
-                    "no radToChars for " + radical + "'''".substring(0, simplified));
+            throw new IllegalArgumentException("no radToChars for " + radicalNumber);
         }
         int length = radicalChars.length();
         // All radical characters should be BMP characters.
         // Unicode 15.1 exception: 182'' --> U+322C4
-        assert length == Character.codePointCount(radicalChars, 0, length)
-                || radicalNumberAndSimplified == makeRadicalNumberAndSimplified(182, 2);
+        assert length == Character.codePointCount(radicalChars, 0, length);
         // Also in Unicode 15.1, there are two radicals for which there are no characters in the
         // radicals blocks.
         // In these cases, radicalChars contains only one code point, the unified ideograph.
@@ -426,24 +443,6 @@ private int getUnifiedIdeographForRadicalNumberAndSimplified(int radicalNumberAn
         return radicalChars.codePointBefore(length);
     }
 
-    private long getDataForCodePoint(int cp) {
-        // There is no Arrays.binarySearch(long[], ...) that takes a Comparator.
-        int start = 0;
-        int limit = rawHan.length;
-        while (start < limit) {
-            int i = (start + limit) / 2;
-            int midCP = getCodePoint(rawHan[i]);
-            if (cp < midCP) {
-                limit = i;
-            } else if (cp > midCP) {
-                start = i + 1;
-            } else /* == */ {
-                return rawHan[i];
-            }
-        }
-        return 0; // not found
-    }
-
     // TODO: Consider moving this into a new class/file CJKRadicals which could also be called from
     // other code
     // that currently uses iup.load(UcdProperty.CJK_Radical) even though that cannot represent all
@@ -479,35 +478,13 @@ private void getCJKRadicals(IndexUnicodeProperties iup) {
         for (UcdLineParser.UcdLine line : parser) {
             String[] parts = line.getParts();
             String radicalString = parts[0];
-            int simplified = 0;
-            int radicalNumberLimit = radicalString.length();
-            if (radicalString.charAt(radicalNumberLimit - 1) == '\'') {
-                simplified = 1;
-                --radicalNumberLimit;
-                if (radicalString.charAt(radicalNumberLimit - 1) == '\'') {
-                    // Unicode 15.1 UAX #38:
-                    // Two apostrophes (") after the radical indicates a
-                    // non-Chinese simplified version of the given radical.
-                    simplified = 2;
-                    --radicalNumberLimit;
-                    if (radicalString.charAt(radicalNumberLimit - 1) == '\'') {
-                        // Unicode 16 UAX #38:
-                        // Three apostrophes after the radical indicates a
-                        // second non-Chinese simplified version of the given radical.
-                        simplified = 3;
-                        --radicalNumberLimit;
-                    }
-                }
-            }
-            int radicalNumber = parseInt(radicalString, 0, radicalNumberLimit);
             int radicalNumberAndSimplified =
-                    makeRadicalNumberAndSimplified(radicalNumber, simplified);
+                    parseRadicalNumberAndSimplified(radicalString, 0, radicalString.length());
             radicalStrings[radicalNumberAndSimplified] = radicalString;
 
-            int radicalChar = -1;
             String radicalCharString = "";
             if (!parts[1].isEmpty()) {
-                radicalChar = Integer.parseInt(parts[1], 16);
+                int radicalChar = Integer.parseInt(parts[1], 16);
                 assert 0 < radicalChar;
                 assert radicalChar < 0x3000; // should be a radical code point
                 radToChar[radicalNumberAndSimplified] =
@@ -523,6 +500,33 @@ private void getCJKRadicals(IndexUnicodeProperties iup) {
         }
     }
 
+    private static int parseRadicalNumberAndSimplified(String s, int start, int limit) {
+        // simplified (2 bits):
+        // https://www.unicode.org/reports/tr38/#SortingAlgorithm
+        // Quoted from Unicode 16.0:
+        // - 0 = traditional form for the radical (for example, U+9F8D 龍)
+        // - 1 = Chinese simplified form of the radical (for example, U+9F99 龙)
+        // - 2 = non-Chinese simplified form of the radical (for example, U+7ADC 竜)
+        //       [new in Unicode 15.1]
+        // - 3 = second non-Chinese simplified form of the radical (for example, U+31DE5 𱷥).
+        //       [new in Unicode 16.0]
+        int simplified = 0;
+        if (s.charAt(limit - 1) == '\'') {
+            simplified = 1;
+            --limit;
+            if (s.charAt(limit - 1) == '\'') {
+                simplified = 2;
+                --limit;
+                if (s.charAt(limit - 1) == '\'') {
+                    simplified = 3;
+                    --limit;
+                }
+            }
+        }
+        int radicalNumber = parseInt(s, start, limit);
+        return makeRadicalNumberAndSimplified(radicalNumber, simplified);
+    }
+
     /**
      * Parses a small (max 3 digits) integer from a subsequence. Avoids creation of a subsequence
      * object.