From 0e12e314204c48ec9930bf5dfa9c1de6f0b1636b Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 27 Nov 2024 16:27:32 -0800 Subject: [PATCH] script: publish early UCD snapshot --- docs/data-workflow.md | 22 +++++++++++++++ pub/copy-ucd-to-draft.sh | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100755 pub/copy-ucd-to-draft.sh diff --git a/docs/data-workflow.md b/docs/data-workflow.md index 102ab8433..01cd0a68f 100644 --- a/docs/data-workflow.md +++ b/docs/data-workflow.md @@ -91,6 +91,28 @@ Make sure to publish exactly the intended set of files. Skip the NamesList.txt and Unihan data files (see above), and skip any others that are only for internal use. +### Publish a UCD snapshot + +When we have a usable snapshot of the UCD for new repertoire, we should publish the data files, +so that the Charts WG and others can pick them up for their work. +As a pre-alpha snapshot, we do not yet publish security/IDNA/emoji files. + +Review/edit the pub/*.sh scripts and advance the version numbers and copyright years. + +Run the [pub/copy-ucd-to-draft.sh](https://github.com/unicode-org/unicodetools/blob/main/pub/copy-ucd-to-draft.sh) +script from an up-to-date repo workspace. +The script copies the set of the .../dev/ data files for an alpha snapshot +from a unicodetools workspace to a target folder with the layout of https://www.unicode.org/Public/draft/ . + +Send the resulting zip file to Rick for posting to https://www.unicode.org/Public/draft/ . +Ask Rick to add other files that are not tracked in the unicodetools repo: +* Unihan.zip to .../draft/UCD/ucd + +TODO: Figure out new process & people replacing Rick in 2025. + +Note: No version/delta infixes in names of data files. +We simply use the “draft” folder and the file-internal time stamps for versioning. + ### Publish an alpha snapshot For the alpha review, publish (at least) the UCD and emoji files, and the charts. diff --git a/pub/copy-ucd-to-draft.sh b/pub/copy-ucd-to-draft.sh new file mode 100755 index 000000000..041cd042c --- /dev/null +++ b/pub/copy-ucd-to-draft.sh @@ -0,0 +1,60 @@ +# Script for +# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-a-ucd-snapshot +# +# Invoke like this: +# +# pub/copy-ucd-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft + +UNICODETOOLS=$1 +DRAFT=$2 + +UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data + +# Adjust the following for each year and version as needed. +COPY_YEAR=2024 +UNI_VER=17.0.0 +EMOJI_VER=17.0 + +TODAY=`date --iso-8601` + +mkdir -p $DRAFT + +cat > $DRAFT/sed-readmes.txt << eof +s/COPY_YEAR/$COPY_YEAR/ +s/PUB_DATE/$TODAY/ +s/PUB_STATUS/draft/ +s/UNI_VER/$UNI_VER/ +s/EMOJI_VER/$EMOJI_VER/ +s%PUBLIC_EMOJI%Public/draft/emoji% +s%PUBLIC_UCD%Public/draft/UCD% +eof + +mkdir -p $DRAFT/UCD/ucd +mkdir -p $DRAFT/zipped +cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd +rm -r $DRAFT/UCD/ucd/Unihan +rm -r $DRAFT/UCD/ucd/emoji +mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt +mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt + +# Fix permissions. Everyone can read, and search directories. +chmod a+rX -R $DRAFT + +# Update the readmes in-place (-i) as set up above. +find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt + +# Zip files for some types of data, after fixing permissions +rm $DRAFT/UCD/ucd/UCD.zip +(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) + +# Cleanup +rm $DRAFT/sed-readmes.txt + +rm $DRAFT/ucd-snapshot.zip +(cd $DRAFT; zip -r ucd-snapshot.zip *) + +echo "--------------------" +echo "Copy files from elsewhere:" +echo "- Unihan.zip to $DRAFT/UCD/ucd" +echo "- Unihan.zip to $DRAFT/zipped" +