Skip to content

Commit

Permalink
Try again to make it not run out of memory
Browse files Browse the repository at this point in the history
  • Loading branch information
e-n-f committed Oct 2, 2023
1 parent 4aba720 commit 528f019
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 59 deletions.
4 changes: 2 additions & 2 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2041,7 +2041,7 @@ std::pair<int, metadata> read_input(std::vector<source> &sources, char *fname, i
vertex_readers.push_back(readers[i].vertexfile);
rewind(readers[i].vertexfile);
}
fqsort(vertex_readers, sizeof(vertex), vertexcmp, vertex_out, memsize / 4);
fqsort(vertex_readers, sizeof(vertex), vertexcmp, vertex_out, memsize / 10);

for (size_t i = 0; i < CPUS; i++) {
if (fclose(readers[i].vertexfile) != 0) {
Expand Down Expand Up @@ -2107,7 +2107,7 @@ std::pair<int, metadata> read_input(std::vector<source> &sources, char *fname, i
rewind(readers[i].nodefile);
}

fqsort(node_readers, sizeof(node), nodecmp, node_out, memsize / 4);
fqsort(node_readers, sizeof(node), nodecmp, node_out, memsize / 10);

for (size_t i = 0; i < CPUS; i++) {
if (fclose(readers[i].nodefile) != 0) {
Expand Down
115 changes: 59 additions & 56 deletions sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,77 +7,84 @@
#define MAX_MEMORY (10 * 1024 * 1024)

void fqsort(std::vector<FILE *> &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out, size_t mem) {
// read some elements into memory to choose a pivot from
std::string pivot;
FILE *fp1, *fp2;

std::string buf;
bool read_everything = false;
for (size_t i = 0; i < inputs.size(); i++) {
if (buf.size() > mem) {
break;
}
{
// read some elements into memory to choose a pivot from
//
// this is in its own scope so `buf` can go out of scope
// before trying to do any sub-sorts.

while (true) {
std::string element;
element.resize(width);
std::string buf;

size_t n = fread((void *) element.c_str(), width, 1, inputs[i]);
if (n == 0) {
if (i + 1 == inputs.size()) {
read_everything = true;
}
bool read_everything = false;
for (size_t i = 0; i < inputs.size(); i++) {
if (buf.size() > mem) {
break;
}

buf.append(element);
while (true) {
std::string element;
element.resize(width);

if (buf.size() > mem) {
break;
size_t n = fread((void *) element.c_str(), width, 1, inputs[i]);
if (n == 0) {
if (i + 1 == inputs.size()) {
read_everything = true;
}
break;
}

buf.append(element);

if (buf.size() > mem) {
break;
}
}
}
}

qsort((void *) buf.c_str(), buf.size() / width, width, cmp);
qsort((void *) buf.c_str(), buf.size() / width, width, cmp);

// If that was everything we have to sort, we are done.
// If that was everything we have to sort, we are done.

if (read_everything) {
fwrite((void *) buf.c_str(), buf.size() / width, width, out);
return;
}
if (read_everything) {
fwrite((void *) buf.c_str(), buf.size() / width, width, out);
return;
}

// Otherwise, choose a pivot from it, make some temporary files,
// write what we have to those files, and then partition the rest
// of the input into them.
// Otherwise, choose a pivot from it, make some temporary files,
// write what we have to those files, and then partition the rest
// of the input into them.

// This would be unstable if the pivot is one of several elements
// that compare equal. Does it matter?
// This would be unstable if the pivot is one of several elements
// that compare equal. Does it matter?

size_t pivot_off = width * (buf.size() / width / 2);
std::string pivot(buf, pivot_off, width);
size_t pivot_off = width * (buf.size() / width / 2);
pivot = std::string(buf, pivot_off, width);

std::string t1 = "/tmp/sort1.XXXXXX";
std::string t2 = "/tmp/sort2.XXXXXX";
std::string t1 = "/tmp/sort1.XXXXXX";
std::string t2 = "/tmp/sort2.XXXXXX";

int fd1 = mkstemp((char *) t1.c_str());
unlink(t1.c_str());
int fd2 = mkstemp((char *) t2.c_str());
unlink(t2.c_str());
int fd1 = mkstemp((char *) t1.c_str());
unlink(t1.c_str());
int fd2 = mkstemp((char *) t2.c_str());
unlink(t2.c_str());

FILE *fp1 = fdopen(fd1, "w+b");
if (fp1 == NULL) {
perror(t1.c_str());
exit(EXIT_FAILURE);
}
FILE *fp2 = fdopen(fd2, "w+b");
if (fp2 == NULL) {
perror(t2.c_str());
exit(EXIT_FAILURE);
}
fp1 = fdopen(fd1, "w+b");
if (fp1 == NULL) {
perror(t1.c_str());
exit(EXIT_FAILURE);
}
fp2 = fdopen(fd2, "w+b");
if (fp2 == NULL) {
perror(t2.c_str());
exit(EXIT_FAILURE);
}

fwrite((void *) buf.c_str(), sizeof(char), pivot_off, fp1);
fwrite((void *) ((char *) buf.c_str() + pivot_off), sizeof(char), buf.size() - pivot_off, fp2);
buf.clear();
buf.reserve(0);
fwrite((void *) buf.c_str(), sizeof(char), pivot_off, fp1);
fwrite((void *) ((char *) buf.c_str() + pivot_off), sizeof(char), buf.size() - pivot_off, fp2);
}

// read the remaining input into the temporary files

Expand Down Expand Up @@ -114,7 +121,3 @@ void fqsort(std::vector<FILE *> &inputs, size_t width, int (*cmp)(const void *,
fqsort(v2, width, cmp, out, mem);
fclose(fp2);
}

void fqsort(std::vector<FILE *> &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out) {
fqsort(inputs, width, cmp, out, MAX_MEMORY);
}
1 change: 0 additions & 1 deletion sort.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#ifndef SORT_HPP
#define SORT_HPP

void fqsort(std::vector<FILE *> &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out);
void fqsort(std::vector<FILE *> &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out, size_t mem);

#endif

0 comments on commit 528f019

Please sign in to comment.