Skip to content

Commit

Permalink
Add -a flag to process all langs
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvNC committed Jul 12, 2024
1 parent 6288be5 commit 3b03e69
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 11 deletions.
2 changes: 2 additions & 0 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import * as path from 'path';
import { getReadingFromDefinition } from './parse/readingParse';
import { pinyin } from 'pinyin-pro';

export const DBPEDIA_DATE = '2022-12-01';

export const LANGUAGE_CODES = [
'hu',
'eu',
Expand Down
33 changes: 27 additions & 6 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import { getVersion } from './util/getVersion';
import { downloadDumps } from './util/downloadDumps';
import { readArgs } from './util/readArgs';
import { readAndProcessLines } from './util/readAndProcessLines';
import { WIKIPEDIA_ICON_FILEPATH } from './constants';
import {
LanguageCode,
WIKIPEDIA_ICON_FILEPATH,
DBPEDIA_DATE,
LANGUAGE_CODES,
} from './constants';

const outputZipName = (lang: string, date: string, version: string) =>
`${lang} Wikipedia [${date}] (v${version}).zip`;
Expand All @@ -17,8 +22,27 @@ const OUT_DIRECTORY = './out';

console.log(`Using version ${version}`);

const { lang, date } = readArgs();
const { lang, date, all } = readArgs();

if (!all) {
await processWikipediaDataForLang(version, dev, lang, date);
} else {
for (const lang of LANGUAGE_CODES) {
await processWikipediaDataForLang(version, dev, lang, DBPEDIA_DATE);
}
}

process.exit(0);
})().catch((e) => {
console.error(e);
});

async function processWikipediaDataForLang(
version: string,
dev: boolean,
lang: LanguageCode,
date: string
) {
console.log(`Converting ${lang} Wikipedia dump from ${date}...`);

const filePath = await downloadDumps(lang, date);
Expand Down Expand Up @@ -46,7 +70,4 @@ const OUT_DIRECTORY = './out';

await dict.export(OUT_DIRECTORY);
console.log(`Exported to ${outputZipName(lang, date, version)}`);
process.exit(0);
})().catch((e) => {
console.error(e);
});
}
10 changes: 8 additions & 2 deletions src/util/downloadDumps.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { $ } from 'bun';
import { exists } from 'node:fs/promises';
import { exists, mkdir } from 'node:fs/promises';
import { join } from 'path';
import { LanguageCode } from '../constants';

Expand All @@ -22,13 +22,17 @@ export async function downloadDumps(lang: LanguageCode, date: string) {

// Check if download directory exists
if (!(await exists(DOWNLOAD_DIR))) {
await $`mkdir ${DOWNLOAD_DIR}`;
console.log(`Creating download directory ${DOWNLOAD_DIR}`);
await mkdir(DOWNLOAD_DIR);
}

const archivePath = join(DOWNLOAD_DIR, ARCHIVE(lang));
const filePath = join(DOWNLOAD_DIR, FILE(lang));
const archiveExists = await exists(archivePath);
const fileExists = await exists(filePath);
console.log(
`${lang}: archiveExists=${archiveExists}, fileExists=${fileExists}`
);

// Download the archive if neither the file nor archive exists
if (!fileExists && !archiveExists) {
Expand All @@ -43,5 +47,7 @@ export async function downloadDumps(lang: LanguageCode, date: string) {
await $`bzip2 -dc ${archivePath} >${filePath}`;
}

console.log(`Finished downloading and extracting ${lang} dump`);

return filePath;
}
10 changes: 7 additions & 3 deletions src/util/readArgs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ export function readArgs() {
type: 'string',
short: 'd',
},
all: {
type: 'boolean',
short: 'a',
},
},
strict: true,
allowPositionals: false,
Expand All @@ -21,7 +25,7 @@ export function readArgs() {
const dateInput = values.date as string;

// Assert language is valid
if (!langInput || !LANGUAGE_CODES.includes(langInput)) {
if (langInput != null && !LANGUAGE_CODES.includes(langInput)) {
throw new Error(
`Language ${langInput} is not allowed or not provided. Allowed languages: ${LANGUAGE_CODES.join(
', '
Expand All @@ -30,11 +34,11 @@ export function readArgs() {
}

// Assert date is valid in format YYYY-MM-DD
if (!dateInput || !/^\d{4}-\d{2}-\d{2}$/.test(dateInput)) {
if (dateInput != null && !/^\d{4}-\d{2}-\d{2}$/.test(dateInput)) {
throw new Error(
`Date ${dateInput} is not valid or not provided. Format: YYYY-MM-DD`
);
}

return { lang: langInput, date: dateInput };
return { lang: langInput, date: dateInput, all: !!values.all };
}

0 comments on commit 3b03e69

Please sign in to comment.