ivoa-std · msdemlei · Dec 18, 2024
diff --git a/convert.py b/convert.py
@@ -50,6 +50,7 @@
 # this is defined in Vocabularies in the VO 2
 KNOWN_PREDICATES = frozenset([
     "ivoasem:preliminary", "ivoasem:deprecated", "ivoasem:useInstead",
+    "ivoasem:UCDSyntaxCode",
     "rdfs:subClassOf",
     "rdfs:subPropertyOf",
     "skos:broader", "skos:exactMatch",
@@ -62,7 +63,7 @@
 FULL_TERM_PATTERN = "[\w\d#:/_.*%-]+"
 
 # an RE our terms themselves must match
-TERM_PATTERN = "[\w\d_-]+"
+TERM_PATTERN = "[\w\d_.-]+"
 
 IVOA_RDF_URI = "http://www.ivoa.net/rdf/"
 
@@ -650,10 +651,16 @@ def _parse_relations(self, relations):
         """
         for predicate, obj in self._iter_relationship_literals(relations):
             # a little hack: URI-fy plain objects by making them part of
-            # the current vocabulary
+            # the current vocabulary; use a backquote (*almost* LISP,
+            # but we don't want to confuse CSV format sniffers) to
+            # suppress that; of course, we need to hex away that backquote
+            # again.
             if obj and re.match(TERM_PATTERN+"$", obj):
                 obj = "#"+obj
 
+            if obj.startswith("`"):
+                obj = obj[1:]
+
             self._add_relation(predicate, obj)
 
     def get_objects_for(self, predicate):
@@ -721,7 +728,9 @@ def _format_more_relations(self):
                ("ivoasem:deprecated", "Deprecated Term"),
                ("skos:exactMatch", "Same As"),
                ("skos:related", "Related"),
-               ("built-in:narrower", "Narrower")]:
+               ("built-in:narrower", "Narrower"),
+               ("ivoasem:UCDSyntaxCode", "UCD Syntax"),
+            ]:
 
             if prop=="built-in:narrower":
                 objs = [self._format_term_as_html(t)

diff --git a/ucd/migration/README b/ucd/migration/README
@@ -0,0 +1,8 @@
+The UCD vocabulary was migrated in 202? from the then-current ASCII
+UCD list, published back then in an endorsed note, <TODO>.
+
+This is the tooling that did the migration.  The commandline is
+
+python convert_list_to_csv.py > ../terms.csv
+
+You do not want to repeat this after the initial migration.
diff --git a/ucd/migration/convert_list_to_csv.py b/ucd/migration/convert_list_to_csv.py
@@ -0,0 +1,30 @@
+"""
+read the ucd-list.txt as from the UCDList EN and make a CSV for
+vocabulary tooling out of it.
+
+This is based on the parsing script in the EN source.
+"""
+
+
+def convert_line(line):
+    pieces = line.split('|')
+    if len(pieces) == 3:
+        return [s.strip() for s in pieces]
+    else:
+        # if it didn't parse, it was not in the normative EN list either,
+        # and so we skip it, too.
+        pass
+
+
+def ucd_to_csv(in_file):
+    for cur_line in in_file:
+        if cur_line.startswith("#"):
+            continue
+        syncode, word, description = convert_line(cur_line)
+        assert '"' not in description
+        print(f"{word};1;{word};\"{description}\";"
+            f"ivoasem:UCDSyntaxCode(`{syncode})")
+
+if __name__ == '__main__':
+    with open("ucd-list.txt", 'r') as f:
+        ucd_to_csv(f)