Skip to content

Commit

Permalink
increase bulk import size for link and create folder if not exist
Browse files Browse the repository at this point in the history
  • Loading branch information
Sacramentix committed May 21, 2024
1 parent e3d6c99 commit f8a38a6
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
8 changes: 4 additions & 4 deletions sql-dump-to-arango/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ async function parseAndLoadRedirect() {
}


if (count % 4096 == 0) {
if (count % 32_768 == 0) {
await previousBatchPromise;

const batch = nextBatch;
Expand All @@ -111,7 +111,7 @@ async function parseAndLoadRedirect() {
previousBatchPromise = insertRedirects(batch, redirect);
// previousBatchPromise = Promise.resolve() as Promise<any>;

if (count % 16_384 == 0) {
if (count % 32_768*8 == 0) {
log(info.bytesRead, count);
}
}
Expand All @@ -124,7 +124,7 @@ async function parseAndLoadRedirect() {
nextBatch.push({_from:r[0],_to:id});

count++;
if (count % 4096 == 0) {
if (count % 32_768 == 0) {
await previousBatchPromise;

const batch = nextBatch;
Expand All @@ -133,7 +133,7 @@ async function parseAndLoadRedirect() {
previousBatchPromise = insertRedirects(batch, redirect);
// previousBatchPromise = Promise.resolve() as Promise<any>;

if (count % 16_384 == 0) {
if (count % 32_768*8 == 0) {
log(info.bytesRead, count);
}
}
Expand Down
6 changes: 3 additions & 3 deletions sql-dump-to-arango/parser/dumpParser.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import { createWriteStream, existsSync, ReadStream } from "fs";
import { createWriteStream, existsSync } from "fs";
import { createGunzip, type Gunzip } from "node:zlib";
import { createReadStream } from "node:fs";
import { stat } from "node:fs/promises";
import { env } from "../env.js";
import { DecompressionStream } from "node:stream/web";
import { mkdir } from "fs/promises";

export type FileType = "page" | "redirect" | "pagelinks";
await sqlDumpStreamFromWeb("redirect");
export async function sqlDumpStreamFromCache(fileType:FileType) {
const path = `./cache/${env.WIKI_LANG}/${env.WIKI_LANG}wiki-latest-${fileType}.sql.gz`;
if (!existsSync(path)) {
await mkdir(`./cache/${env.WIKI_LANG}`, {recursive: true}).catch();
const writeToFile = createWriteStream(path);
const response = await fetch(`https://dumps.wikimedia.org/${env.WIKI_LANG}wiki/latest/${env.WIKI_LANG}wiki-latest-${fileType}.sql.gz`);
const size = parseInt(response.headers.get("Content-Length") || "0");
Expand Down

0 comments on commit f8a38a6

Please sign in to comment.