From f8a38a6be5f3d1eaacaacd708a44d3f5df9d83cc Mon Sep 17 00:00:00 2001 From: Sacramenitx Date: Tue, 21 May 2024 13:30:42 +0200 Subject: [PATCH] increase bulk import size for link and create folder if not exist --- sql-dump-to-arango/index.ts | 8 ++++---- sql-dump-to-arango/parser/dumpParser.ts | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql-dump-to-arango/index.ts b/sql-dump-to-arango/index.ts index c2024a4..c06aaca 100644 --- a/sql-dump-to-arango/index.ts +++ b/sql-dump-to-arango/index.ts @@ -102,7 +102,7 @@ async function parseAndLoadRedirect() { } - if (count % 4096 == 0) { + if (count % 32_768 == 0) { await previousBatchPromise; const batch = nextBatch; @@ -111,7 +111,7 @@ async function parseAndLoadRedirect() { previousBatchPromise = insertRedirects(batch, redirect); // previousBatchPromise = Promise.resolve() as Promise; - if (count % 16_384 == 0) { + if (count % 32_768*8 == 0) { log(info.bytesRead, count); } } @@ -124,7 +124,7 @@ async function parseAndLoadRedirect() { nextBatch.push({_from:r[0],_to:id}); count++; - if (count % 4096 == 0) { + if (count % 32_768 == 0) { await previousBatchPromise; const batch = nextBatch; @@ -133,7 +133,7 @@ async function parseAndLoadRedirect() { previousBatchPromise = insertRedirects(batch, redirect); // previousBatchPromise = Promise.resolve() as Promise; - if (count % 16_384 == 0) { + if (count % 32_768*8 == 0) { log(info.bytesRead, count); } } diff --git a/sql-dump-to-arango/parser/dumpParser.ts b/sql-dump-to-arango/parser/dumpParser.ts index 7698e17..4559c99 100644 --- a/sql-dump-to-arango/parser/dumpParser.ts +++ b/sql-dump-to-arango/parser/dumpParser.ts @@ -1,15 +1,15 @@ -import { createWriteStream, existsSync, ReadStream } from "fs"; +import { createWriteStream, existsSync } from "fs"; import { createGunzip, type Gunzip } from "node:zlib"; import { createReadStream } from "node:fs"; import { stat } from "node:fs/promises"; import { env } from "../env.js"; -import { DecompressionStream } from "node:stream/web"; +import { mkdir } from "fs/promises"; export type FileType = "page" | "redirect" | "pagelinks"; -await sqlDumpStreamFromWeb("redirect"); export async function sqlDumpStreamFromCache(fileType:FileType) { const path = `./cache/${env.WIKI_LANG}/${env.WIKI_LANG}wiki-latest-${fileType}.sql.gz`; if (!existsSync(path)) { + await mkdir(`./cache/${env.WIKI_LANG}`, {recursive: true}).catch(); const writeToFile = createWriteStream(path); const response = await fetch(`https://dumps.wikimedia.org/${env.WIKI_LANG}wiki/latest/${env.WIKI_LANG}wiki-latest-${fileType}.sql.gz`); const size = parseInt(response.headers.get("Content-Length") || "0");