From 528f01975b52dec31d0bf4c40ed9c219186d857b Mon Sep 17 00:00:00 2001 From: Erica Fischer Date: Mon, 2 Oct 2023 16:52:27 -0700 Subject: [PATCH] Try again to make it not run out of memory --- main.cpp | 4 +- sort.cpp | 115 ++++++++++++++++++++++++++++--------------------------- sort.hpp | 1 - 3 files changed, 61 insertions(+), 59 deletions(-) diff --git a/main.cpp b/main.cpp index 5376e77ce..2c303a99e 100644 --- a/main.cpp +++ b/main.cpp @@ -2041,7 +2041,7 @@ std::pair read_input(std::vector &sources, char *fname, i vertex_readers.push_back(readers[i].vertexfile); rewind(readers[i].vertexfile); } - fqsort(vertex_readers, sizeof(vertex), vertexcmp, vertex_out, memsize / 4); + fqsort(vertex_readers, sizeof(vertex), vertexcmp, vertex_out, memsize / 10); for (size_t i = 0; i < CPUS; i++) { if (fclose(readers[i].vertexfile) != 0) { @@ -2107,7 +2107,7 @@ std::pair read_input(std::vector &sources, char *fname, i rewind(readers[i].nodefile); } - fqsort(node_readers, sizeof(node), nodecmp, node_out, memsize / 4); + fqsort(node_readers, sizeof(node), nodecmp, node_out, memsize / 10); for (size_t i = 0; i < CPUS; i++) { if (fclose(readers[i].nodefile) != 0) { diff --git a/sort.cpp b/sort.cpp index 058b6563e..4edb4959c 100644 --- a/sort.cpp +++ b/sort.cpp @@ -7,77 +7,84 @@ #define MAX_MEMORY (10 * 1024 * 1024) void fqsort(std::vector &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out, size_t mem) { - // read some elements into memory to choose a pivot from + std::string pivot; + FILE *fp1, *fp2; - std::string buf; - bool read_everything = false; - for (size_t i = 0; i < inputs.size(); i++) { - if (buf.size() > mem) { - break; - } + { + // read some elements into memory to choose a pivot from + // + // this is in its own scope so `buf` can go out of scope + // before trying to do any sub-sorts. - while (true) { - std::string element; - element.resize(width); + std::string buf; - size_t n = fread((void *) element.c_str(), width, 1, inputs[i]); - if (n == 0) { - if (i + 1 == inputs.size()) { - read_everything = true; - } + bool read_everything = false; + for (size_t i = 0; i < inputs.size(); i++) { + if (buf.size() > mem) { break; } - buf.append(element); + while (true) { + std::string element; + element.resize(width); - if (buf.size() > mem) { - break; + size_t n = fread((void *) element.c_str(), width, 1, inputs[i]); + if (n == 0) { + if (i + 1 == inputs.size()) { + read_everything = true; + } + break; + } + + buf.append(element); + + if (buf.size() > mem) { + break; + } } } - } - qsort((void *) buf.c_str(), buf.size() / width, width, cmp); + qsort((void *) buf.c_str(), buf.size() / width, width, cmp); - // If that was everything we have to sort, we are done. + // If that was everything we have to sort, we are done. - if (read_everything) { - fwrite((void *) buf.c_str(), buf.size() / width, width, out); - return; - } + if (read_everything) { + fwrite((void *) buf.c_str(), buf.size() / width, width, out); + return; + } - // Otherwise, choose a pivot from it, make some temporary files, - // write what we have to those files, and then partition the rest - // of the input into them. + // Otherwise, choose a pivot from it, make some temporary files, + // write what we have to those files, and then partition the rest + // of the input into them. - // This would be unstable if the pivot is one of several elements - // that compare equal. Does it matter? + // This would be unstable if the pivot is one of several elements + // that compare equal. Does it matter? - size_t pivot_off = width * (buf.size() / width / 2); - std::string pivot(buf, pivot_off, width); + size_t pivot_off = width * (buf.size() / width / 2); + pivot = std::string(buf, pivot_off, width); - std::string t1 = "/tmp/sort1.XXXXXX"; - std::string t2 = "/tmp/sort2.XXXXXX"; + std::string t1 = "/tmp/sort1.XXXXXX"; + std::string t2 = "/tmp/sort2.XXXXXX"; - int fd1 = mkstemp((char *) t1.c_str()); - unlink(t1.c_str()); - int fd2 = mkstemp((char *) t2.c_str()); - unlink(t2.c_str()); + int fd1 = mkstemp((char *) t1.c_str()); + unlink(t1.c_str()); + int fd2 = mkstemp((char *) t2.c_str()); + unlink(t2.c_str()); - FILE *fp1 = fdopen(fd1, "w+b"); - if (fp1 == NULL) { - perror(t1.c_str()); - exit(EXIT_FAILURE); - } - FILE *fp2 = fdopen(fd2, "w+b"); - if (fp2 == NULL) { - perror(t2.c_str()); - exit(EXIT_FAILURE); - } + fp1 = fdopen(fd1, "w+b"); + if (fp1 == NULL) { + perror(t1.c_str()); + exit(EXIT_FAILURE); + } + fp2 = fdopen(fd2, "w+b"); + if (fp2 == NULL) { + perror(t2.c_str()); + exit(EXIT_FAILURE); + } - fwrite((void *) buf.c_str(), sizeof(char), pivot_off, fp1); - fwrite((void *) ((char *) buf.c_str() + pivot_off), sizeof(char), buf.size() - pivot_off, fp2); - buf.clear(); - buf.reserve(0); + fwrite((void *) buf.c_str(), sizeof(char), pivot_off, fp1); + fwrite((void *) ((char *) buf.c_str() + pivot_off), sizeof(char), buf.size() - pivot_off, fp2); + } // read the remaining input into the temporary files @@ -114,7 +121,3 @@ void fqsort(std::vector &inputs, size_t width, int (*cmp)(const void *, fqsort(v2, width, cmp, out, mem); fclose(fp2); } - -void fqsort(std::vector &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out) { - fqsort(inputs, width, cmp, out, MAX_MEMORY); -} diff --git a/sort.hpp b/sort.hpp index 40b53b3fa..cfae2cf9e 100644 --- a/sort.hpp +++ b/sort.hpp @@ -1,7 +1,6 @@ #ifndef SORT_HPP #define SORT_HPP -void fqsort(std::vector &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out); void fqsort(std::vector &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out, size_t mem); #endif