From 90b987a656d1d9c49879b9b11de4f29201dfc820 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Tue, 10 Sep 2024 18:56:57 -0500 Subject: [PATCH] CLDR-17935 site: add crawler sitemap (#4028) --- docs/site/.gitignore | 2 ++ docs/site/_layouts/page.html | 4 +-- docs/site/assets/js/build.mjs | 49 ++++++++++++++++++++++++++++++++++- docs/site/package-lock.json | 42 ++++++++++++++++++++++++++++++ docs/site/package.json | 1 + docs/site/robots.txt | 1 + 6 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 docs/site/robots.txt diff --git a/docs/site/.gitignore b/docs/site/.gitignore index f72d4052c50..8336c7f1d7d 100644 --- a/docs/site/.gitignore +++ b/docs/site/.gitignore @@ -1,3 +1,5 @@ /node_modules /assets/json /assets/vendor +/sitemap.xml +/sitemap.md diff --git a/docs/site/_layouts/page.html b/docs/site/_layouts/page.html index d8976da6256..857db712665 100644 --- a/docs/site/_layouts/page.html +++ b/docs/site/_layouts/page.html @@ -17,9 +17,7 @@
This navigation UI is temporary, just to give access to the pages.
- - + diff --git a/docs/site/assets/js/build.mjs b/docs/site/assets/js/build.mjs index 899911350bd..c185a19c817 100644 --- a/docs/site/assets/js/build.mjs +++ b/docs/site/assets/js/build.mjs @@ -4,8 +4,10 @@ import * as fs from "node:fs/promises"; import * as path from "node:path"; import { default as process } from "node:process"; import { default as matter } from "gray-matter"; +import { SitemapStream, streamToPromise } from "sitemap"; +import { Readable } from "node:stream"; -const SKIP_THESE = /(node_modules|\.jekyll-cache)/; +const SKIP_THESE = /(node_modules|\.jekyll-cache|^sitemap.*)/; async function processFile(d, fullPath, out) { const f = await fs.readFile(fullPath, "utf-8"); @@ -40,6 +42,49 @@ async function traverse(d, out) { return Promise.all(promises); } +/** replace a/b/c.md with a/b/c */ +function dropmd(p) { + return p.replace(/\.md$/, ""); +} + +async function writeSiteMaps(out) { + // simple list of links + const links = await Promise.all( + out.all.map(async ({ fullPath, title }) => { + const stat = await fs.stat(fullPath); + return { + url: dropmd(`/${fullPath}`), + lastmod: stat.mtime.toISOString(), + }; + }) + ); + const stream = new SitemapStream({ hostname: "https://cldr.unicode.org" }); + const data = ( + await streamToPromise(Readable.from(links).pipe(stream)) + ).toString(); + await fs.writeFile("./sitemap.xml", data, "utf-8"); + console.log("Wrote sitemap.xml"); + + /* + const coll = new Intl.Collator(["und"]); + const allSorted = [...out.all].sort((a, b) => + coll.compare(a.fullPath, b.fullPath) + ); + await fs.writeFile( + "./sitemap.md", + `---\ntitle: Site Map\n---\n\n` + + allSorted + .map( + ({ fullPath, title }) => + `- [/${fullPath}](/${dropmd(fullPath)}) - ${title}` + ) + .join("\n"), + "utf-8" + ); + console.log("Wrote sitemap.md"); + */ +} + async function main() { const out = { all: [], @@ -48,6 +93,8 @@ async function main() { await fs.mkdir("assets/json/", { recursive: true }); await traverse(".", out); await fs.writeFile("assets/json/tree.json", JSON.stringify(out, null, " ")); + console.log("Wrote assets/json/tree.json"); + await writeSiteMaps(out); } main().then( diff --git a/docs/site/package-lock.json b/docs/site/package-lock.json index 01a0940a8a0..74410ef94e3 100644 --- a/docs/site/package-lock.json +++ b/docs/site/package-lock.json @@ -11,6 +11,7 @@ "license": "Unicode-3.0", "dependencies": { "gray-matter": "^4.0.3", + "sitemap": "^8.0.0", "vue": "^3.5.0" } }, @@ -62,6 +63,19 @@ "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==" }, + "node_modules/@types/node": { + "version": "17.0.45", + "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.45.tgz", + "integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==" + }, + "node_modules/@types/sax": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.7.tgz", + "integrity": "sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@vue/compiler-core": { "version": "3.5.0", "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.0.tgz", @@ -153,6 +167,11 @@ "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.0.tgz", "integrity": "sha512-m9IgiteBpCkFaMNwCOBkFksA7z8QiKc30ooRuoXWUFRDu0mGyNPlFHmbncF0/Kra1RlX8QrmBbRaIxVvikaR0Q==" }, + "node_modules/arg": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", + "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==" + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -304,6 +323,11 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/sax": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.4.1.tgz", + "integrity": "sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==" + }, "node_modules/section-matter": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/section-matter/-/section-matter-1.0.0.tgz", @@ -316,6 +340,24 @@ "node": ">=4" } }, + "node_modules/sitemap": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/sitemap/-/sitemap-8.0.0.tgz", + "integrity": "sha512-+AbdxhM9kJsHtruUF39bwS/B0Fytw6Fr1o4ZAIAEqA6cke2xcoO2GleBw9Zw7nRzILVEgz7zBM5GiTJjie1G9A==", + "dependencies": { + "@types/node": "^17.0.5", + "@types/sax": "^1.2.1", + "arg": "^5.0.0", + "sax": "^1.2.4" + }, + "bin": { + "sitemap": "dist/cli.js" + }, + "engines": { + "node": ">=14.0.0", + "npm": ">=6.0.0" + } + }, "node_modules/source-map-js": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", diff --git a/docs/site/package.json b/docs/site/package.json index 3eac2b7b0ac..a347f5258d4 100644 --- a/docs/site/package.json +++ b/docs/site/package.json @@ -14,6 +14,7 @@ "private": true, "dependencies": { "gray-matter": "^4.0.3", + "sitemap": "^8.0.0", "vue": "^3.5.0" } } diff --git a/docs/site/robots.txt b/docs/site/robots.txt new file mode 100644 index 00000000000..a364c8e95e6 --- /dev/null +++ b/docs/site/robots.txt @@ -0,0 +1 @@ +Sitemap: https://cldr.unicode.org/sitemap.xml