-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
156 lines (134 loc) · 3.93 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import fs, { readdirSync } from "fs";
import path from "path";
import readline from "readline";
let fileCounter = 0;
let chunkCounter = 0;
const settings = {
dir: "temp",
filename: "ex", //input file for sorting
outfile: "sorted",
outdir: "out",
maxFileMb: 80, //max splitted file size, if --max-old-space-size=500, keep it less than 80 to avoid heap out of memory
maxFilesInDir: 500, //not more than 1000, otherwise readdirSync will not be avaliable to get them
};
//paste here your sorting algorithm/compareFunction
function compare(a, b) {
if (+a - +b > 0) return 1;
return -1;
}
async function splitFile(path) {
const rl = readline.createInterface({
input: fs.createReadStream(path),
});
for await (const line of rl) {
fs.appendFileSync(
`${settings.dir}/chunk${chunkCounter}/${fileCounter}`,
line + "\n",
(err) => {
console.log(err);
}
);
if (
fs.statSync(`${settings.dir}/chunk${chunkCounter}/${fileCounter}`).size >
settings.maxFileMb * 1024 * 1024
) {
fileCounter++;
if (fileCounter % settings.maxFilesInDir == 0) {
chunkCounter++;
if (!fs.existsSync(`${settings.dir}/chunk${chunkCounter}`)) {
fs.mkdirSync(`${settings.dir}/chunk${chunkCounter}`);
}
}
}
}
rl.close();
}
const updatePaths = (dir) => {
const paths = [];
readdirSync(dir).forEach((d) => {
readdirSync(`${dir}/${d}`).forEach((file) => {
paths.push(`${dir}/${d}/${file}`);
});
});
return paths.sort((a, b) => {
if (a.length != b.length) return a.length - b.length;
return +a.charAt(a.length - 1) - +b.charAt(b.length - 1);
});
};
async function externalMerge(file1, file2, outfile) {
outfile += `${Date.now()}`;
if (!fs.existsSync(file1)) {
fs.writeFileSync(file1, "");
}
if (!fs.existsSync(file2)) {
fs.writeFileSync(file2, "");
}
const readMain = readline.createInterface({
input: fs.createReadStream(file1),
});
const readTemp = readline.createInterface({
input: fs.createReadStream(file2),
});
const iterMain = readMain[Symbol.asyncIterator]();
const iterTemp = readTemp[Symbol.asyncIterator]();
var aw1 = await iterMain.next();
var aw2 = await iterTemp.next();
while (!aw1.done && !aw2.done) {
if (compare(aw1.value, aw2.value) > 0) {
fs.appendFileSync(outfile, aw2.value + "\n");
aw2 = await iterTemp.next();
continue;
}
fs.appendFileSync(outfile, aw1.value + "\n");
aw1 = await iterMain.next();
}
if (!aw1.done) {
while (!aw1.done) {
fs.appendFileSync(outfile, aw1.value + "\n");
aw1 = await iterMain.next();
}
}
if (!aw2.done) {
while (!aw2.done) {
fs.appendFileSync(outfile, aw2.value + "\n");
aw2 = await iterTemp.next();
}
}
}
function sortFile(filePath) {
const data = fs.readFileSync(filePath, { encoding: "utf8" });
const lines = data
.trim()
.split("\n")
.sort((a, b) => compare(a, b));
fs.writeFileSync(filePath, lines.join("\n"));
}
const start = new Date();
console.log("Started at ");
console.log(start);
if (!fs.existsSync(settings.dir)) {
fs.mkdirSync(settings.dir);
}
if (!fs.existsSync(settings.outdir)) {
fs.mkdirSync(settings.outdir);
}
if (!fs.existsSync(`${settings.dir}/chunk${chunkCounter}`)) {
fs.mkdirSync(`${settings.dir}/chunk${chunkCounter}`);
}
await splitFile(settings.filename);
var paths = updatePaths(settings.dir);
paths.forEach((p) => sortFile(p));
while (paths.length > 1) {
await externalMerge(paths[0], paths[1], `${path.parse(paths[1]).dir}/`);
fs.unlinkSync(paths[0]);
fs.unlinkSync(paths[1]);
paths = updatePaths(settings.dir);
}
fs.renameSync(paths[0], `${settings.outdir}/${settings.outfile}`);
fs.rmSync(settings.dir, { recursive: true, force: true });
console.log(`${settings.filename} was successfuly sorted!`);
const end = new Date();
console.log("Finished at");
console.log(end);
console.log("Total elapsed time:");
console.log(end - start);