From ed4ecf0b2f81eb8f39d16b8fc6a0b30488d6316a Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Mon, 2 Oct 2023 16:50:37 +0200
Subject: [PATCH 1/7] First attempt at generating InPC

---
 .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)

diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
index 702c46ca5..645a490e2 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
@@ -908,6 +908,126 @@ Format: kenFile skipValue=Rotated
 #
 Property: VerticalOrientation
 
+File:	IndicPositionalCategory
+# IndicPositionalCategory-15.1.0.txt
+# Date: 2023-01-05
+# © 2023 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# For documentation, see UAX #44: Unicode Character Database,
+# at https://www.unicode.org/reports/tr44/
+#
+# This file defines the following property:
+#
+#    Indic_Positional_Category     enumerated property
+#
+# Scope: This property is aimed at the problem of
+# the specification of syllabic structure for Indic scripts.
+# Because dependent vowels (matras), visible viramas, and other
+# characters are placed in notional slots around the consonant (or
+# consonant cluster) core of an Indic syllable, there may be
+# cooccurrence constraints or other interactions. Also, it may be
+# desirable, in cases where more than one such character may occur in
+# sequence, as for example, in a top slot and a bottom slot, to
+# specify preferred orders for spelling. As such, this property
+# is designed primarily to supplement the Indic_Syllabic_Category
+# property.
+#
+# In addition to combining marks associated with Indic scripts, the
+# Indic_Positional_Category has non-trivial values for special signs
+# associated with Indic_Syllabic_Category=Consonant_Prefixed
+# or Indic_Syllabic_Category=Consonant_Preceding_Repha. Those signs
+# have General_Category=Lo, rather than being combining marks.
+# They occur in initial position in syllabic structure. However, when
+# rendered, they appear as marks positioned with respect to another
+# base letter (usually above it). Hence, having an explicit value for
+# Indic_Positional_Category for those signs can be helpful.
+#
+# Note that this property is *not* intended as
+# a prescriptive property regarding display or font design,
+# for a number of reasons. Good font design requires information
+# that is outside the context of a character encoding standard,
+# and is best handled in other venues. For Indic dependent
+# vowels and similar characters, in particular:
+#
+#   1. Matra placement may vary somewhat based on typeface design.
+#   2. Matra placement, even within a single script, may vary
+#      somewhat according to historic period or local conventions.
+#   3. Matra placement may be changed by explicit orthographic reform
+#      decisions.
+#   4. Matras may ligate in various ways with a consonant (or even
+#      other elements of a syllable) instead of occurring in a
+#      discrete location.
+#   5. Matra display may be contextually determined. This is
+#      notable, for example, in the Tamil script, where the shape
+#      and placement of -u and -uu vowels depends strongly on
+#      which consonant they adjoin.
+#
+# Format:
+#    Field 0  Unicode code point value or range of code point values
+#    Field 1  Indic_Positional_Category property value
+#
+# Field 1 is followed by a comment field, starting with the number sign '#',
+# which shows the General_Category property value, the Unicode character name
+# or names, and, in lines with ranges of code points, the code point count in
+# square brackets.
+#
+# The scripts assessed as containing dependent vowels or similar characters
+# in the structural sense used for the Indic_Positional_Category are the
+# following:
+#
+# Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid,
+# Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati,
+# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi,
+# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu,
+# Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi,
+# Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra,
+# Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri,
+# Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai,
+# Tibetan, Tirhuta, and Zanabazar Square.
+#
+# All characters for all other scripts not in that list
+# take the default value for this property.
+#
+# See IndicSyllabicCategory.txt for a slightly more extended
+# list of Indic scripts, including those which do not have
+# positional characters. Currently, those additional
+# Indic scripts without positional characters are
+# Multani, Phags-pa, and Tai Le.
+#
+# Notes:
+#
+# 1. The following characters are all assigned the positional category Right,
+# but may have different positions in some cases:
+#   * U+0BC1 TAMIL VOWEL SIGN U and U+0BC2 TAMIL VOWEL SIGN UU have
+#     contextually variable placement in Tamil.
+#   * U+0D41 MALAYALAM VOWEL SIGN U and U+0D42 MALAYALAM VOWEL SIGN UU form
+#     complex ligatures with consonants in older Malayalam orthography.
+#   * U+11341 GRANTHA VOWEL SIGN U and U+11342 GRANTHA VOWEL SIGN UU have
+#     contextually variable placement in Grantha.
+#   * U+11440 NEWA VOWEL SIGN O and U+11441 NEWA VOWEL SIGN AU have contextually
+#     variable placement in Newa.
+#
+# 2. The following characters are all assigned the positional category Top,
+# but may have different positions in some cases:
+#   * U+1143E NEWA VOWEL SIGN E and U+1143F NEWA VOWEL SIGN AI have contextually
+#     variable placement in Newa.
+#
+# 3. The following characters are all assigned the positional category Bottom,
+# but may have different positions in some cases:
+#   * U+102F MYANMAR VOWEL SIGN U and U+1030 MYANMAR VOWEL SIGN UU have
+#     contextually variable placement in Myanmar.
+#   * U+1A69 TAI THAM VOWEL SIGN U and U+1A6A TAI THAM VOWEL SIGN UU have
+#     contextually variable placement in Tai Tham.
+#
+# 4. The following character is assigned the positional category Left, but
+# may have different positions in different styles:
+#   * U+119D2 NANDINAGARI VOWEL SIGN I has stylistically variable placement
+#     in Nandinagari.
+Property: Indic_Positional_Category
+Format:	skipValue=NA
+
 File:	UnicodeData
 Property: SPECIAL
 

From 8349834417948514bfca81d3c5a27c846ed40d54 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Mon, 2 Oct 2023 17:32:58 +0200
Subject: [PATCH 2/7] Some progress towards roozbehFile

---
 .../unicode/text/UCD/MakeUnicodeFiles.java    | 18 ++++++++++
 .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 35 +++++++++++++------
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index e1ff508ad..afb906a02 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -67,6 +67,8 @@ static class Format {
         Map<String, List<String>> fileToPropertySet = new TreeMap<String, List<String>>();
         Map<String, String> fileToComments = new TreeMap<String, String>();
         Map<String, String> fileToDirectory = new TreeMap<String, String>();
+        Map<String, List<String>> propertyToOrderedValues =
+                new TreeMap<String, List<String>>();
         Map<String, Map<String, String>> propertyToValueToComments =
                 new TreeMap<String, Map<String, String>>();
         Map<String, String> hackMap = new HashMap<String, String>();
@@ -110,6 +112,10 @@ public static class PrintStyle {
             // Unicode 15.1 and later LineBreak.txt and EastAsianWidth.txt, which are all generated
             // in that format by some other tool.
             boolean kenFile = false;
+            // Whether the file should be produced in the style of IndicPositionalCategory.txt and
+            // IndicSyllabicCategory.txt, which are both generated in that format by some other
+            // tool.
+            boolean roozbehFile = false;
             boolean hackValues = false;
             boolean mergeRanges = true;
             String nameStyle = "none";
@@ -138,6 +144,8 @@ String parse(String options) {
                         interleaveValues = true;
                     } else if (piece.equals("kenFile")) {
                         kenFile = true;
+                    } else if (piece.equals("roozbehFile")) {
+                        roozbehFile = true;
                     } else if (piece.equals("hackValues")) {
                         hackValues = true;
                     } else if (piece.equals("sortNumeric")) {
@@ -350,6 +358,12 @@ private void build() {
                             value = "";
                         } else if (line.startsWith("Value:")) {
                             value = lineValue;
+                            var values = propertyToOrderedValues.get(property);
+                            if (values == null) {
+                                values = new ArrayList<String>();
+                                propertyToOrderedValues.put(property, values);
+                            }
+                            values.add(value);
                         } else if (line.startsWith("HackName:")) {
                             final String regularItem = Utility.getUnskeleton(lineValue, true);
                             hackMap.put(regularItem, lineValue);
@@ -1254,6 +1268,10 @@ private static void writeEnumeratedValues(
             temp2.addAll(aliases);
             aliases = temp2;
         }
+        if (ps.roozbehFile) {
+            System.out.println(Format.theFormat.propertyToOrderedValues);
+            aliases = Format.theFormat.propertyToOrderedValues.get(prop.getName());
+        }
         if (ps.sortNumeric) {
             if (DEBUG) {
                 System.out.println("Reordering");
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
index 645a490e2..6902d6b2c 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
@@ -909,15 +909,6 @@ Format: kenFile skipValue=Rotated
 Property: VerticalOrientation
 
 File:	IndicPositionalCategory
-# IndicPositionalCategory-15.1.0.txt
-# Date: 2023-01-05
-# © 2023 Unicode®, Inc.
-# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
-#
-# For documentation, see UAX #44: Unicode Character Database,
-# at https://www.unicode.org/reports/tr44/
-#
 # This file defines the following property:
 #
 #    Indic_Positional_Category     enumerated property
@@ -1025,8 +1016,32 @@ File:	IndicPositionalCategory
 # may have different positions in different styles:
 #   * U+119D2 NANDINAGARI VOWEL SIGN I has stylistically variable placement
 #     in Nandinagari.
+
 Property: Indic_Positional_Category
-Format:	skipValue=NA
+Format:	roozbehFile skipValue=NA
+Value: Right
+Value: Left
+Value: Visual_Order_Left
+
+# These are dependent vowels that occur to the left of the consonant
+# letter in a syllable, but which occur in scripts using the visual order
+# model, instead of the logical order model. Because of the different
+# model, these left-side vowels occur first in the backing store (before
+# the consonant letter) and are not reordered during text rendering.
+#
+# [Derivation: Logical_Order_Exception=Yes]
+Value: Left_And_Right
+Value: Top
+Value: Bottom
+Value: Top_And_Bottom
+Value: Top_And_Right
+Value: Top_And_Left
+Value: Top_And_Left_And_Right
+Value: Bottom_And_Right
+Value: Bottom_And_Left
+Value: Top_And_Bottom_And_Right
+Value: Top_And_Bottom_And_Left
+Value: Overstruck
 
 File:	UnicodeData
 Property: SPECIAL

From 52bbc737086f3afab7d661d6c354b713e00aed51 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Mon, 2 Oct 2023 17:39:18 +0200
Subject: [PATCH 3/7] ---

---
 .../main/java/org/unicode/text/UCD/MakeUnicodeFiles.java    | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index afb906a02..981849162 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -1434,7 +1434,11 @@ private static void writeEnumeratedValues(
 
             if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
                 pw.println();
-                pw.println(SEPARATOR);
+                if (ps.roozbehFile) {
+                    pw.println(SEPARATOR.replace('=', '-'));
+                } else {
+                    pw.println(SEPARATOR);
+                }
                 if (nonLongValue) {
                     pw.println();
                     pw.println("# " + prop.getName() + "=" + value);

From cd39981f7766d285380e211f72f7800cb37298c4 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Mon, 2 Oct 2023 18:10:09 +0200
Subject: [PATCH 4/7] more roozbehifying

---
 .../org/unicode/text/UCD/MakeUnicodeFiles.java | 18 +++++++++++++-----
 .../org/unicode/text/UCD/MakeUnicodeFiles.txt  |  3 ++-
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index 981849162..3b9c8b0b8 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -714,7 +714,11 @@ private static void writeUnihan(String directory) throws IOException {
             pw.println();
             pw.println(SEPARATOR);
             pw.println();
-            pw.println("# Property:\t" + propName);
+            if (ps.roozbehFile) {
+                pw.println("# Property: " + propName);
+            } else {
+                pw.println("# Property:\t" + propName);
+            }
 
             final UnicodeMap<String> map = props.get(propName);
 
@@ -1320,6 +1324,9 @@ private static void writeEnumeratedValues(
                 writeEnumeratedMissingValues(pw, overallDefault, defaultLbValues);
             }
         }
+        if (ps.roozbehFile) {
+            pw.println(SEPARATOR.replace('=', '-'));
+        }
         for (final Iterator<String> it = aliases.iterator(); it.hasNext(); ) {
             final String value = it.next();
             if (DEBUG) {
@@ -1434,13 +1441,13 @@ private static void writeEnumeratedValues(
 
             if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
                 pw.println();
-                if (ps.roozbehFile) {
-                    pw.println(SEPARATOR.replace('=', '-'));
-                } else {
+                if (!ps.roozbehFile) {
                     pw.println(SEPARATOR);
                 }
                 if (nonLongValue) {
-                    pw.println();
+                    if (!ps.roozbehFile) {
+                        pw.println();
+                    }
                     pw.println("# " + prop.getName() + "=" + value);
                 }
             }
@@ -1464,6 +1471,7 @@ private static void writeEnumeratedValues(
             pw.println();
             // if (s.size() != 0)
             bf.setMergeRanges(ps.mergeRanges);
+            bf.setShowTotal(!ps.roozbehFile);
             bf.showSetNames(pw, s);
             if (DEBUG) {
                 System.out.println(bf.showSetNames(s));
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
index 6902d6b2c..3eaec2cdd 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
@@ -909,6 +909,7 @@ Format: kenFile skipValue=Rotated
 Property: VerticalOrientation
 
 File:	IndicPositionalCategory
+#
 # This file defines the following property:
 #
 #    Indic_Positional_Category     enumerated property
@@ -1018,7 +1019,7 @@ File:	IndicPositionalCategory
 #     in Nandinagari.
 
 Property: Indic_Positional_Category
-Format:	roozbehFile skipValue=NA
+Format:	roozbehFile valueStyle=short skipValue=NA
 Value: Right
 Value: Left
 Value: Visual_Order_Left

From fb6bf4e50b8174411c2bba9ff3606f0d7bf15869 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Mon, 2 Oct 2023 20:06:10 +0200
Subject: [PATCH 5/7] As good as it will get.

---
 .../data/ucd/dev/IndicPositionalCategory.txt  |  8 +++----
 .../unicode/text/UCD/MakeUnicodeFiles.java    | 22 +++++++++++--------
 .../org/unicode/text/UCD/MakeUnicodeFiles.txt |  1 -
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
index a7c5aef60..0381f42cb 100644
--- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
+++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
@@ -1,11 +1,11 @@
-# IndicPositionalCategory-15.1.0.txt
-# Date: 2023-01-05
+# IndicPositionalCategory-16.0.0.txt
+# Date: 2023-10-02, 18:04:25 GMT
 # © 2023 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see https://www.unicode.org/terms_of_use.html
 #
-# For documentation, see UAX #44: Unicode Character Database,
-# at https://www.unicode.org/reports/tr44/
+# Unicode Character Database
+#   For documentation, see https://www.unicode.org/reports/tr44/
 #
 # This file defines the following property:
 #
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index 3b9c8b0b8..6fbe8432c 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -714,11 +714,7 @@ private static void writeUnihan(String directory) throws IOException {
             pw.println();
             pw.println(SEPARATOR);
             pw.println();
-            if (ps.roozbehFile) {
-                pw.println("# Property: " + propName);
-            } else {
-                pw.println("# Property:\t" + propName);
-            }
+            pw.println("# Property:\t" + propName);
 
             final UnicodeMap<String> map = props.get(propName);
 
@@ -1170,6 +1166,9 @@ public static void generatePropertyFile(String filename) throws IOException {
                             filename, Format.theFormat.getPrintStyle(name));
             if (!ps.kenFile) {
                 pwProp.println();
+                if (ps.roozbehFile) {
+                    pwProp.println();
+                }
                 pwProp.println(SEPARATOR);
             }
             final String propComment = Format.theFormat.getValueComments(name, "");
@@ -1179,7 +1178,11 @@ public static void generatePropertyFile(String filename) throws IOException {
                     pwProp.println(propComment);
                 } else if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
                     pwProp.println();
-                    pwProp.println("# Property:\t" + name);
+                    if (ps.roozbehFile) {
+                        pwProp.println("# Property: " + name);
+                    } else {
+                        pwProp.println("# Property:\t" + name);
+                    }
                 }
             }
 
@@ -1200,9 +1203,9 @@ public static void generatePropertyFile(String filename) throws IOException {
                         v = v + " (" + v2 + ")";
                     }
                 }
-                pwProp.println();
+                pwProp.println(ps.roozbehFile ? "#" : "");
                 pwProp.println("#  All code points not explicitly listed for " + prop.getName());
-                pwProp.println("#  have the value " + v + ".");
+                pwProp.println("#  have the value " + v + (ps.roozbehFile && v.equals("NA") ? " (not applicable)." : "."));
             }
 
             if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
@@ -1306,7 +1309,7 @@ private static void writeEnumeratedValues(
 
         final String missing = ps.skipUnassigned != null ? ps.skipUnassigned : ps.skipValue;
         if (missing != null && !missing.equals(UCD_Names.NO)) {
-            pw.println();
+            pw.println(ps.roozbehFile ? "#" : "");
             final String propName = bf.getPropName();
             //      if (propName == null) propName = "";
             //      else if (propName.length() != 0) propName = propName + "; ";
@@ -1325,6 +1328,7 @@ private static void writeEnumeratedValues(
             }
         }
         if (ps.roozbehFile) {
+            pw.println();
             pw.println(SEPARATOR.replace('=', '-'));
         }
         for (final Iterator<String> it = aliases.iterator(); it.hasNext(); ) {
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
index 3eaec2cdd..a2fc7229a 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
@@ -1017,7 +1017,6 @@ File:	IndicPositionalCategory
 # may have different positions in different styles:
 #   * U+119D2 NANDINAGARI VOWEL SIGN I has stylistically variable placement
 #     in Nandinagari.
-
 Property: Indic_Positional_Category
 Format:	roozbehFile valueStyle=short skipValue=NA
 Value: Right

From d6b73d940daa97673c5d739ad3584b6e56c2430e Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 3 Oct 2023 01:03:58 +0200
Subject: [PATCH 6/7] Seems to work well enough

---
 .../data/ucd/dev/IndicPositionalCategory.txt  |   2 +-
 .../data/ucd/dev/IndicSyllabicCategory.txt    |  10 +-
 .../unicode/text/UCD/MakeUnicodeFiles.java    |  21 +-
 .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 237 +++++++++++++++++-
 4 files changed, 258 insertions(+), 12 deletions(-)

diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
index 0381f42cb..9b5aabfa0 100644
--- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
+++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
@@ -1,5 +1,5 @@
 # IndicPositionalCategory-16.0.0.txt
-# Date: 2023-10-02, 18:04:25 GMT
+# Date: 2023-10-02, 22:58:33 GMT
 # © 2023 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
index f2623b471..5de0d7554 100644
--- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
+++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
@@ -1,11 +1,11 @@
-# IndicSyllabicCategory-15.1.0.txt
-# Date: 2023-01-05
+# IndicSyllabicCategory-16.0.0.txt
+# Date: 2023-10-02, 22:58:33 GMT
 # © 2023 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see https://www.unicode.org/terms_of_use.html
 #
-# For documentation, see UAX #44: Unicode Character Database,
-# at https://www.unicode.org/reports/tr44/
+# Unicode Character Database
+#   For documentation, see https://www.unicode.org/reports/tr44/
 #
 # This file defines the following property:
 #
@@ -1335,7 +1335,7 @@ ABF0..ABF9    ; Number # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI
 # script, e.g. in Brahmi)
 #
 # Note: These are different from Numbers, in the way that there is no known
-# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. 
+# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants.
 # Until such evidence is found, implementations may assume that Brahmi
 # Joining Numbers only participate in shaping with other Brahmi Joining
 # Numbers.
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index 6fbe8432c..d24e283c9 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -116,6 +116,8 @@ public static class PrintStyle {
             // IndicSyllabicCategory.txt, which are both generated in that format by some other
             // tool.
             boolean roozbehFile = false;
+            // Whether to separate values of enumerated properties using a line of equal signs.
+            boolean separateValues = true;
             boolean hackValues = false;
             boolean mergeRanges = true;
             String nameStyle = "none";
@@ -146,6 +148,8 @@ String parse(String options) {
                         kenFile = true;
                     } else if (piece.equals("roozbehFile")) {
                         roozbehFile = true;
+                    } else if (piece.startsWith("separateValues=")) {
+                        separateValues = afterEqualsBoolean(piece);
                     } else if (piece.equals("hackValues")) {
                         hackValues = true;
                     } else if (piece.equals("sortNumeric")) {
@@ -309,6 +313,10 @@ private void build() {
                     }
                     line = line.trim();
                     if (line.length() == 0) {
+                        if (comments.length() != 0) {
+                            // Preserve blank lines between comments.
+                            comments += "\n";
+                        }
                         continue;
                     }
                     if (DEBUG) {
@@ -329,6 +337,7 @@ private void build() {
                         comments += line;
                     } else {
                         // end of comments, roll up
+                        comments = comments.trim();
                         if (comments.length() != 0) {
                             if (property != null) {
                                 addValueComments(property, value, comments);
@@ -1166,7 +1175,7 @@ public static void generatePropertyFile(String filename) throws IOException {
                             filename, Format.theFormat.getPrintStyle(name));
             if (!ps.kenFile) {
                 pwProp.println();
-                if (ps.roozbehFile) {
+                if (!ps.separateValues) {
                     pwProp.println();
                 }
                 pwProp.println(SEPARATOR);
@@ -1276,7 +1285,6 @@ private static void writeEnumeratedValues(
             aliases = temp2;
         }
         if (ps.roozbehFile) {
-            System.out.println(Format.theFormat.propertyToOrderedValues);
             aliases = Format.theFormat.propertyToOrderedValues.get(prop.getName());
         }
         if (ps.sortNumeric) {
@@ -1327,7 +1335,7 @@ private static void writeEnumeratedValues(
                 writeEnumeratedMissingValues(pw, overallDefault, defaultLbValues);
             }
         }
-        if (ps.roozbehFile) {
+        if (!ps.separateValues) {
             pw.println();
             pw.println(SEPARATOR.replace('=', '-'));
         }
@@ -1445,11 +1453,11 @@ private static void writeEnumeratedValues(
 
             if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
                 pw.println();
-                if (!ps.roozbehFile) {
+                if (ps.separateValues) {
                     pw.println(SEPARATOR);
                 }
                 if (nonLongValue) {
-                    if (!ps.roozbehFile) {
+                    if (ps.separateValues) {
                         pw.println();
                     }
                     pw.println("# " + prop.getName() + "=" + value);
@@ -1476,6 +1484,9 @@ private static void writeEnumeratedValues(
             // if (s.size() != 0)
             bf.setMergeRanges(ps.mergeRanges);
             bf.setShowTotal(!ps.roozbehFile);
+            if (ps.roozbehFile) {
+                bf.setRangeBreakSource(ToolUnicodePropertySource.make(Default.ucdVersion()).getProperty("Block"));
+            }
             bf.showSetNames(pw, s);
             if (DEBUG) {
                 System.out.println(bf.showSetNames(s));
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
index a2fc7229a..db8ebd7b8 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
@@ -1018,7 +1018,7 @@ File:	IndicPositionalCategory
 #   * U+119D2 NANDINAGARI VOWEL SIGN I has stylistically variable placement
 #     in Nandinagari.
 Property: Indic_Positional_Category
-Format:	roozbehFile valueStyle=short skipValue=NA
+Format:	roozbehFile separateValues=false valueStyle=short skipValue=NA
 Value: Right
 Value: Left
 Value: Visual_Order_Left
@@ -1043,6 +1043,241 @@ Value: Top_And_Bottom_And_Right
 Value: Top_And_Bottom_And_Left
 Value: Overstruck
 
+File:	IndicSyllabicCategory
+#
+# This file defines the following property:
+#
+#    Indic_Syllabic_Category     enumerated property
+#
+# Scope: This property is aimed at two general problem
+# areas involving the analysis and processing of Indic scripts:
+#
+#   1. Specification of syllabic structure.
+#   2. Specification of segmentation rules.
+#
+# Both of these problem areas may benefit from having defined subtypes
+# of Indic script characters which are relevant to how Indic
+# syllables (or aksaras) are constructed. Note that rules for
+# syllabic structure in Indic scripts may differ significantly
+# from how phonological syllables are defined.
+#
+# Format:
+#    Field 0  Unicode code point value or range of code point values
+#    Field 1  Indic_Syllabic_Category property value
+#
+# Field 1 is followed by a comment field, starting with the number sign '#',
+# which shows the General_Category property value, the Unicode character name
+# or names, and, in lines with ranges of code points, the code point count in
+# square brackets.
+#
+# The scripts assessed as Indic in the structural sense used for the
+# Indic_Syllabic_Category are the following:
+#
+# Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid,
+# Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati,
+# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi,
+# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu,
+# Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek,
+# Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya,
+# Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo,
+# Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham,
+# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, and
+# Zanabazar Square.
+#
+# All characters for all other scripts not in that list
+# take the default value for this property, unless they
+# are individually listed in this data file.
+#
+Property: Indic_Syllabic_Category
+Format: roozbehFile valueStyle=short skipValue=Other
+Value: Bindu
+# Bindu/Anusvara (nasalization or -n)
+
+# [Not derivable]
+Value: Visarga
+# Visarga (-h)
+# Excludes letters for jihvamuliya and upadhmaniya, which are
+#   related, but structured somewhat differently.
+
+# [Not derivable]
+Value: Avagraha
+# Avagraha (elision of initial a- in sandhi)
+
+# [Not derivable]
+Value: Nukta
+# Nukta (diacritic for borrowed consonants or other consonant
+# modifications). Note that while the resulting sound is typically a
+# consonant, the base letter a nukta follows may be an independent
+# vowel. For example, <U+0A85 GUJARATI LETTER A, U+0AFD GUJARATI
+# SIGN THREE-DOT NUKTA ABOVE> is used to transcribe ARABIC LETTER
+# AIN.
+
+# [Not derivable]
+Value: Virama
+# Virama (killing of inherent vowel in consonant sequence
+#         or consonant stacker)
+# Only includes characters that can act both as visible killer viramas
+# and consonant stackers. Separate property values exist for characters
+# that can only act as pure killers or only as consonant stackers.
+
+# [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker)
+#  - (InSC=Number_Joiner) - 2D7F]
+Value: Pure_Killer
+# Pure killer (killing of inherent vowel in consonant sequence,
+#              with no consonant stacking behavior)
+
+# [Not derivable]
+Value: Invisible_Stacker
+# Invisible stacker (invisible consonant stacker virama).
+#
+# Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible
+# stacker may have a second function, changing the shape and/or location of the
+# consonant preceding it, even when there is no consonant following the
+# invisible stacker.
+
+# [Not derivable]
+Value: Vowel_Independent
+# Independent Vowels (contrasted with matras)
+
+# [Not derivable]
+Value: Vowel_Dependent
+# Dependent Vowels (contrasted with independent vowels and/or with
+# complex placement). Known as matras in Indic scripts. Also
+# includes vowel modifiers that follow dependent (and sometimes
+# independent) vowels.
+
+# [Not derivable]
+Value: Vowel
+# (Other) Vowels (reanalyzed as ordinary alphabetic letters or marks)
+
+# [Not derivable]
+Value: Consonant_Placeholder
+# Consonant Placeholder
+# This includes generic placeholders used for
+# Indic script layout (NBSP and dotted circle), as well as a few script-
+# specific vowel-holder characters which are not technically
+# consonants, but serve instead as bases for placement of vowel marks.
+
+# [Not derivable]
+Value: Consonant
+# Consonant (ordinary abugida consonants, with inherent vowels)
+
+# [Not derivable]
+Value: Consonant_Dead
+# Dead Consonant (special consonant with killed vowel)
+
+# [Not derivable]
+Value: Consonant_With_Stacker
+# Consonants that may make stacked ligatures with the next consonant
+# without the use of a virama
+
+# [Not derivable]
+Value: Consonant_Prefixed
+# Cluster-initial consonants
+
+# [Not derivable]
+Value: Consonant_Preceding_Repha
+# Repha Form of RA (reanalyzed in some scripts), when preceding the main
+# consonant.
+
+# [Not derivable]
+Value: Consonant_Initial_Postfixed
+# Consonants that succeed the main consonant in character sequences, but are
+# pronounced before it.
+
+# [Not derivable]
+Value: Consonant_Succeeding_Repha
+# Repha Form of RA (reanalyzed in some scripts), when succeeding the main
+# consonant.
+
+# [Not derivable]
+Value: Consonant_Subjoined
+# Subjoined Consonant (C2 form subtending a base consonant in Tibetan, etc.)
+
+# [Not derivable]
+Value: Consonant_Medial
+# Medial Consonant (medial liquid, occurring in clusters)
+
+# [Not derivable]
+Value: Consonant_Final
+# Final Consonant (special final forms which do not take vowels)
+
+# [Not derivable]
+Value: Consonant_Head_Letter
+# Head Letter (Tibetan)
+
+# [Not derivable]
+Value: Modifying_Letter
+# Reanalyzed letters not participating in the abugida structure, but
+# serving to modify the sound of an adjacent vowel or consonant.
+# Note that this is not the same as General_Category=Modifier_Letter.
+
+# [Not derivable]
+Value: Tone_Letter
+# Tone Letter (spacing lexical tone mark with status as a letter)
+
+# [Not derivable]
+Value: Tone_Mark
+# Tone Mark (nonspacing or spacing lexical tone mark)
+
+# [Not derivable]
+Value: Gemination_Mark
+# Gemination Mark (doubling of the preceding or following consonant)
+#
+# U+0A71 GURMUKHI ADDAK precedes the consonant it geminates, while the
+# others follow the consonant they geminate.
+
+# [Not derivable]
+Value: Cantillation_Mark
+# Cantillation Mark (recitation marks, such as svara markers for the Samaveda)
+
+# [Not derivable]
+Value: Register_Shifter
+# Register Shifter (shifts register for consonants, akin to a tone mark)
+
+# [Not derivable]
+Value: Syllable_Modifier
+# Syllable Modifier (miscellaneous combining characters that modify
+# something in the orthographic syllable they succeed or appear in)
+
+# [Not derivable]
+Value: Consonant_Killer
+# Consonant Killer (signifies that the previous consonant or consonants are
+# not pronounced)
+
+# [Not derivable]
+Value: Non_Joiner
+# Non_Joiner (Zero Width Non-Joiner)
+
+# [Not derivable]
+Value: Joiner
+# Joiner (Zero Width Joiner)
+
+# [Not derivable]
+Value: Number_Joiner
+# Number_Joiner (forms ligatures between numbers for multiplication)
+
+# [Not derivable]
+Value: Number
+# Number (can be used as vowel-holders like consonant placeholders)
+# Note: A number may even hold subjoined consonants which may in turn
+# have been formed using a virama or a stacker, e.g. the sequence
+# <U+1A93, U+1A60, U+1A34> where THAI THAM LETTER LOW TA is subjoined to
+# TAI THAM THAM DIGIT THREE using an invisible stacker.
+
+# [Not derivable]
+Value: Brahmi_Joining_Number
+# Brahmi Joining Number (may be joined by a Number_Joiner of the same
+# script, e.g. in Brahmi)
+#
+# Note: These are different from Numbers, in the way that there is no known
+# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. 
+# Until such evidence is found, implementations may assume that Brahmi
+# Joining Numbers only participate in shaping with other Brahmi Joining
+# Numbers.
+
+# [Not derivable]
+
 File:	UnicodeData
 Property: SPECIAL
 

From e0e2fc26cd5342de9c033f8729f4d7c182d335b3 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 3 Oct 2023 01:06:07 +0200
Subject: [PATCH 7/7] spotless

---
 .../java/org/unicode/text/UCD/MakeUnicodeFiles.java   | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index d24e283c9..de6a5dea9 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -67,8 +67,7 @@ static class Format {
         Map<String, List<String>> fileToPropertySet = new TreeMap<String, List<String>>();
         Map<String, String> fileToComments = new TreeMap<String, String>();
         Map<String, String> fileToDirectory = new TreeMap<String, String>();
-        Map<String, List<String>> propertyToOrderedValues =
-                new TreeMap<String, List<String>>();
+        Map<String, List<String>> propertyToOrderedValues = new TreeMap<String, List<String>>();
         Map<String, Map<String, String>> propertyToValueToComments =
                 new TreeMap<String, Map<String, String>>();
         Map<String, String> hackMap = new HashMap<String, String>();
@@ -1214,7 +1213,10 @@ public static void generatePropertyFile(String filename) throws IOException {
                 }
                 pwProp.println(ps.roozbehFile ? "#" : "");
                 pwProp.println("#  All code points not explicitly listed for " + prop.getName());
-                pwProp.println("#  have the value " + v + (ps.roozbehFile && v.equals("NA") ? " (not applicable)." : "."));
+                pwProp.println(
+                        "#  have the value "
+                                + v
+                                + (ps.roozbehFile && v.equals("NA") ? " (not applicable)." : "."));
             }
 
             if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
@@ -1485,7 +1487,8 @@ private static void writeEnumeratedValues(
             bf.setMergeRanges(ps.mergeRanges);
             bf.setShowTotal(!ps.roozbehFile);
             if (ps.roozbehFile) {
-                bf.setRangeBreakSource(ToolUnicodePropertySource.make(Default.ucdVersion()).getProperty("Block"));
+                bf.setRangeBreakSource(
+                        ToolUnicodePropertySource.make(Default.ucdVersion()).getProperty("Block"));
             }
             bf.showSetNames(pw, s);
             if (DEBUG) {