Skip to content

Commit

Permalink
Add remaining Unicode properties
Browse files Browse the repository at this point in the history
  • Loading branch information
ota-meshi committed Oct 27, 2024
1 parent cc5ec01 commit a54a27e
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module.exports = {
},
overrides: [
{
files: ["acorn/src/bin/*.js", "bin/generate-identifier-regex.js"],
files: ["acorn/src/bin/*.js", "bin/generate-identifier-regex.js", "bin/generate-unicode-script-values.js"],
rules: {
"no-console": "off"
}
Expand Down
2 changes: 2 additions & 0 deletions acorn/src/generated/scriptValuesAddedInUnicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// This file was generated by "bin/generate-unicode-script-values.js". Do not modify manually!
export default "Gara Garay Gukh Gurung_Khema Hrkt Katakana_Or_Hiragana Kawi Kirat_Rai Krai Nag_Mundari Nagm Ol_Onal Onao Sunu Sunuwar Todhri Todr Tulu_Tigalari Tutg Unknown Zzzz"
3 changes: 2 additions & 1 deletion acorn/src/unicode-property-data.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {wordsRegexp} from "./util.js"
import scriptValuesAddedInUnicode from "./generated/scriptValuesAddedInUnicode.js"

// This file contains Unicode properties extracted from the ECMAScript specification.
// The lists are extracted like so:
Expand Down Expand Up @@ -42,7 +43,7 @@ const ecma10ScriptValues = ecma9ScriptValues + " Dogra Dogr Gunjala_Gondi Gong H
const ecma11ScriptValues = ecma10ScriptValues + " Elymaic Elym Nandinagari Nand Nyiakeng_Puachue_Hmong Hmnp Wancho Wcho"
const ecma12ScriptValues = ecma11ScriptValues + " Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi"
const ecma13ScriptValues = ecma12ScriptValues + " Cypro_Minoan Cpmn Old_Uyghur Ougr Tangsa Tnsa Toto Vithkuqi Vith"
const ecma14ScriptValues = ecma13ScriptValues + " Hrkt Katakana_Or_Hiragana Kawi Nag_Mundari Nagm Unknown Zzzz"
const ecma14ScriptValues = ecma13ScriptValues + " " + scriptValuesAddedInUnicode

const unicodeScriptValues = {
9: ecma9ScriptValues,
Expand Down
59 changes: 59 additions & 0 deletions bin/generate-unicode-script-values.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"use strict"

const fs = require("fs")
const path = require("path")

import("../acorn/src/unicode-property-data.js")
.then(m => {
return m.default[13].nonBinary.Script
})
.then(async(reScriptValuesAddedInES) => {
const scriptValues = new Set()
for await (const value of getLatestUnicodeScriptValues()) {
scriptValues.add(value)
}
const scriptValuesAddedInUnicode = "export default " +
JSON.stringify(
[...scriptValues]
// The unicode script values now follow the Unicode spec as of ES2023,
// but prior to ES2022 they were listed in the ES2022 spec.
// The generated file lists all the unicode script values except those listed before ES2022.
.filter(value => !reScriptValuesAddedInES.test(value))
.sort()
.join(" ")
)

writeGeneratedFile("scriptValuesAddedInUnicode", scriptValuesAddedInUnicode)

console.log("Done. The generated files must be committed.")
})

function writeGeneratedFile(filename, content) {
const comment = "// This file was generated by \"bin/" + path.basename(__filename) + "\". Do not modify manually!"
fs.writeFileSync(path.resolve("./acorn/src/generated", filename + ".js"), comment + "\n" + content + "\n", "utf8")
}

/**
* Gets the all unicode script values from the latest PropertyValueAliases.
*/
async function * getLatestUnicodeScriptValues() {
const response = await fetch("https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt")
const lines = (await response.text()).split("\n")
for (const line of lines) {
if (!line || line.startsWith("#")) {
continue
}
const [propertyAlias, alias, canonical, ...remaining] = line
.split("#")[0] // strip comments
.split(";") // split by semicolon
.map((x) => x.trim()) // trim

if (propertyAlias !== "sc") {
continue
}

yield canonical
yield alias
yield * remaining
}
}
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
"build:loose": "rollup -c acorn-loose/rollup.config.mjs",
"build:main": "rollup -c acorn/rollup.config.mjs",
"build:walk": "rollup -c acorn-walk/rollup.config.mjs",
"generate": "node bin/generate-identifier-regex.js",
"generate": "npm run generate:identifier-regex && npm run generate:unicode-script-values",
"generate:identifier-regex": "node bin/generate-identifier-regex.js",
"generate:unicode-script-values": "node bin/generate-unicode-script-values.js",
"lint": "eslint .",
"prepare": "npm run test",
"pretest": "npm run build:main && npm run build:loose",
Expand Down

0 comments on commit a54a27e

Please sign in to comment.