diff --git a/docs/site/.gitignore b/docs/site/.gitignore index 8336c7f1d7d..9b916be5cff 100644 --- a/docs/site/.gitignore +++ b/docs/site/.gitignore @@ -3,3 +3,5 @@ /assets/vendor /sitemap.xml /sitemap.md +/sitemap.tsv + diff --git a/docs/site/assets/js/build.mjs b/docs/site/assets/js/build.mjs index c185a19c817..235c6973cc2 100644 --- a/docs/site/assets/js/build.mjs +++ b/docs/site/assets/js/build.mjs @@ -9,6 +9,8 @@ import { Readable } from "node:stream"; const SKIP_THESE = /(node_modules|\.jekyll-cache|^sitemap.*)/; +const SITE = "https://cldr.unicode.org"; + async function processFile(d, fullPath, out) { const f = await fs.readFile(fullPath, "utf-8"); const m = matter(f); @@ -42,11 +44,88 @@ async function traverse(d, out) { return Promise.all(promises); } +/** replace a/b/c.md with a/b */ +function path2dir(p) { + const dir = p.split("/").slice(0, -1).join("/"); + return dir; +} + +/** replace a/b/c.md with a/b/c.html */ +function md2html(p) { + return p.replace(/\.md$/, ".html"); +} + +/** replace a/b/c.html with a/b/c.md */ +function html2md(p) { + return p.replace(/\.html$/, ".md"); +} + /** replace a/b/c.md with a/b/c */ function dropmd(p) { return p.replace(/\.md$/, ""); } +function tabs(n) { + let s = []; + for (let i = 0; i < n; i++) { + s.push("\t"); + } + return s.join(""); +} + +function mkurl(p) { + return `${SITE}/${md2html(p)}`; +} + +const coll = new Intl.Collator(["und"]); + +function writeSiteMapSheet({ all, allDirs }, path, outsheet) { + // write my index + function indexForPath(p) { + if (p === "") { + p = "index.md"; + } else { + p = path2dir(p) + ".md"; + } + return all.findIndex(({ fullPath }) => fullPath === p); + } + const myIndex = indexForPath(path); + if (myIndex === -1) { + throw Error(`Could not find index for ${path}`); + } + const { title, fullPath: indexPath } = all[myIndex]; + // find how how much to indent. + // 'path' is '' or 'foo/' or 'foo/bar/baz/' at this point. + const slashes = path.replace(/[^\/]+/g, ""); // foo/bar/ => // + const indent = tabs(slashes.length); // number of slashes => number of tabs + outsheet.push(`${indent}${title}\t${mkurl(indexPath)}`); + + // now, gather the children. + const children = all.filter(({ fullPath }) => { + if (fullPath === indexPath) return false; // no self-list. + const myDir = path2dir(fullPath); + // would this item be under our dir? + if (`${myDir}.md` === indexPath) return true; + // special case for odd /index subdir + if (indexPath === `index.md` && myDir === "") return true; + return false; + }); + + children.sort((a, b) => coll.compare(a.fullPath, b.fullPath)); + + children.forEach(({ title, fullPath }) => { + // if an index, recurse instead. + const baseName = dropmd(fullPath); // downloads.md -> downloads + if (allDirs.has(baseName)) { + // it's a non-leaf node, recurse. + writeSiteMapSheet({ all, allDirs }, `${baseName}/`, outsheet); + } else { + // write leaf (non-index) child pages + outsheet.push(`${indent}\t${title}\t${mkurl(fullPath)}`); + } + }); +} + async function writeSiteMaps(out) { // simple list of links const links = await Promise.all( @@ -58,15 +137,13 @@ async function writeSiteMaps(out) { }; }) ); - const stream = new SitemapStream({ hostname: "https://cldr.unicode.org" }); + const stream = new SitemapStream({ hostname: SITE }); const data = ( await streamToPromise(Readable.from(links).pipe(stream)) ).toString(); await fs.writeFile("./sitemap.xml", data, "utf-8"); - console.log("Wrote sitemap.xml"); + console.log(`Wrote sitemap.xml with ${links.length} entries`); - /* - const coll = new Intl.Collator(["und"]); const allSorted = [...out.all].sort((a, b) => coll.compare(a.fullPath, b.fullPath) ); @@ -82,7 +159,24 @@ async function writeSiteMaps(out) { "utf-8" ); console.log("Wrote sitemap.md"); - */ + + // now, create sitemap.tsv by walking + const outsheet = []; + const allPaths = out.all.map(({ fullPath }) => fullPath); + // Find all 'directories' (ending with /) + const allDirs = new Set(); + allPaths.forEach((p) => { + const segs = p.split("/").slice(0, -1); // ['', 'dir1'] + for (let n = 0; n <= segs.length; n++) { + // add all parent paths, so: '', dir1, dir1/dir2 etc. + const subpath = segs.slice(0, n).join("/"); + allDirs.add(subpath); + } + }); + + writeSiteMapSheet({ all: out.all, allDirs }, "", outsheet); + await fs.writeFile("./sitemap.tsv", outsheet.join("\n"), "utf-8"); + console.log(`wrote sitemap.tsv with ${outsheet.length} entries`); } async function main() {