From 3b03e6971d7948872f9b35fb1baab978f95833be Mon Sep 17 00:00:00 2001 From: MarvNC Date: Thu, 11 Jul 2024 21:16:33 -0700 Subject: [PATCH] Add -a flag to process all langs --- src/constants.ts | 2 ++ src/index.ts | 33 +++++++++++++++++++++++++++------ src/util/downloadDumps.ts | 10 ++++++++-- src/util/readArgs.ts | 10 +++++++--- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/constants.ts b/src/constants.ts index d1768c4..0fad892 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -2,6 +2,8 @@ import * as path from 'path'; import { getReadingFromDefinition } from './parse/readingParse'; import { pinyin } from 'pinyin-pro'; +export const DBPEDIA_DATE = '2022-12-01'; + export const LANGUAGE_CODES = [ 'hu', 'eu', diff --git a/src/index.ts b/src/index.ts index a907852..1688378 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,7 +5,12 @@ import { getVersion } from './util/getVersion'; import { downloadDumps } from './util/downloadDumps'; import { readArgs } from './util/readArgs'; import { readAndProcessLines } from './util/readAndProcessLines'; -import { WIKIPEDIA_ICON_FILEPATH } from './constants'; +import { + LanguageCode, + WIKIPEDIA_ICON_FILEPATH, + DBPEDIA_DATE, + LANGUAGE_CODES, +} from './constants'; const outputZipName = (lang: string, date: string, version: string) => `${lang} Wikipedia [${date}] (v${version}).zip`; @@ -17,8 +22,27 @@ const OUT_DIRECTORY = './out'; console.log(`Using version ${version}`); - const { lang, date } = readArgs(); + const { lang, date, all } = readArgs(); + if (!all) { + await processWikipediaDataForLang(version, dev, lang, date); + } else { + for (const lang of LANGUAGE_CODES) { + await processWikipediaDataForLang(version, dev, lang, DBPEDIA_DATE); + } + } + + process.exit(0); +})().catch((e) => { + console.error(e); +}); + +async function processWikipediaDataForLang( + version: string, + dev: boolean, + lang: LanguageCode, + date: string +) { console.log(`Converting ${lang} Wikipedia dump from ${date}...`); const filePath = await downloadDumps(lang, date); @@ -46,7 +70,4 @@ const OUT_DIRECTORY = './out'; await dict.export(OUT_DIRECTORY); console.log(`Exported to ${outputZipName(lang, date, version)}`); - process.exit(0); -})().catch((e) => { - console.error(e); -}); +} diff --git a/src/util/downloadDumps.ts b/src/util/downloadDumps.ts index ece6582..9b34846 100644 --- a/src/util/downloadDumps.ts +++ b/src/util/downloadDumps.ts @@ -1,5 +1,5 @@ import { $ } from 'bun'; -import { exists } from 'node:fs/promises'; +import { exists, mkdir } from 'node:fs/promises'; import { join } from 'path'; import { LanguageCode } from '../constants'; @@ -22,13 +22,17 @@ export async function downloadDumps(lang: LanguageCode, date: string) { // Check if download directory exists if (!(await exists(DOWNLOAD_DIR))) { - await $`mkdir ${DOWNLOAD_DIR}`; + console.log(`Creating download directory ${DOWNLOAD_DIR}`); + await mkdir(DOWNLOAD_DIR); } const archivePath = join(DOWNLOAD_DIR, ARCHIVE(lang)); const filePath = join(DOWNLOAD_DIR, FILE(lang)); const archiveExists = await exists(archivePath); const fileExists = await exists(filePath); + console.log( + `${lang}: archiveExists=${archiveExists}, fileExists=${fileExists}` + ); // Download the archive if neither the file nor archive exists if (!fileExists && !archiveExists) { @@ -43,5 +47,7 @@ export async function downloadDumps(lang: LanguageCode, date: string) { await $`bzip2 -dc ${archivePath} >${filePath}`; } + console.log(`Finished downloading and extracting ${lang} dump`); + return filePath; } diff --git a/src/util/readArgs.ts b/src/util/readArgs.ts index df34e42..d2cca64 100644 --- a/src/util/readArgs.ts +++ b/src/util/readArgs.ts @@ -12,6 +12,10 @@ export function readArgs() { type: 'string', short: 'd', }, + all: { + type: 'boolean', + short: 'a', + }, }, strict: true, allowPositionals: false, @@ -21,7 +25,7 @@ export function readArgs() { const dateInput = values.date as string; // Assert language is valid - if (!langInput || !LANGUAGE_CODES.includes(langInput)) { + if (langInput != null && !LANGUAGE_CODES.includes(langInput)) { throw new Error( `Language ${langInput} is not allowed or not provided. Allowed languages: ${LANGUAGE_CODES.join( ', ' @@ -30,11 +34,11 @@ export function readArgs() { } // Assert date is valid in format YYYY-MM-DD - if (!dateInput || !/^\d{4}-\d{2}-\d{2}$/.test(dateInput)) { + if (dateInput != null && !/^\d{4}-\d{2}-\d{2}$/.test(dateInput)) { throw new Error( `Date ${dateInput} is not valid or not provided. Format: YYYY-MM-DD` ); } - return { lang: langInput, date: dateInput }; + return { lang: langInput, date: dateInput, all: !!values.all }; }