From 002386e57f70e2a2c8af302b679baf10d25de203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Fri, 22 Sep 2023 10:49:00 +0200 Subject: [PATCH] PHPLIB-1237 Implement GridFS multi-file upload & download (#1170) Parallel Benchmarks specs: GridFS upload & download https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#gridfs-upload Single implementation using one fork for each file uploaded/downloaded. --- .../src/DriverBench/ParallelGridFSBench.php | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 benchmark/src/DriverBench/ParallelGridFSBench.php diff --git a/benchmark/src/DriverBench/ParallelGridFSBench.php b/benchmark/src/DriverBench/ParallelGridFSBench.php new file mode 100644 index 000000000..ba0dc8713 --- /dev/null +++ b/benchmark/src/DriverBench/ParallelGridFSBench.php @@ -0,0 +1,142 @@ +drop(); + + foreach (self::getFileNames() as $file) { + unlink($file); + } + } + + /** + * GridFS multi-file upload + * + * @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#gridfs-multi-file-upload + */ + #[BeforeMethods('beforeUpload')] + public function benchUpload(): void + { + $pids = []; + foreach (self::getFileNames() as $file) { + $pid = pcntl_fork(); + if ($pid === 0) { + Utils::getDatabase()->selectGridFSBucket()->uploadFromStream(basename($file), fopen($file, 'r')); + + // Exit the child process + exit(0); + } + + if ($pid === -1) { + throw new RuntimeException('Failed to fork'); + } + + // Keep the forked process id to wait for it later + $pids[$pid] = true; + } + + // Wait for all child processes to finish + while ($pids !== []) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + } + + public function beforeUpload(): void + { + foreach (self::getFileNames() as $file) { + stream_copy_to_stream(Data::getStream(5 * 1024 * 1024), fopen($file, 'w')); + } + + $database = Utils::getDatabase(); + $database->drop(); + + $bucket = $database->selectGridFSBucket(); + $bucket->uploadFromStream('init', Data::getStream(1)); + + Utils::reset(); + } + + /** + * GridFS multi-file download + * + * @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#gridfs-multi-file-download + */ + #[BeforeMethods('beforeDownload')] + public function benchDownload(): void + { + $pids = []; + foreach (self::getFileNames() as $file) { + $pid = pcntl_fork(); + if ($pid === 0) { + $stream = Utils::getDatabase() + ->selectGridFSBucket() + ->openDownloadStreamByName(basename($file)); + stream_copy_to_stream($stream, fopen($file, 'w')); + + // Exit the child process + exit(0); + } + + if ($pid === -1) { + throw new RuntimeException('Failed to fork'); + } + + // Keep the forked process id to wait for it later + $pids[$pid] = true; + } + + // Wait for all child processes to finish + while ($pids !== []) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + } + + public function beforeDownload(): void + { + // Initialize the GridFS bucket with the files + $this->beforeUpload(); + $this->benchUpload(); + } + + private static function getFileNames(): array + { + $tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark'; + if (! is_dir($tempDir)) { + mkdir($tempDir); + } + + return array_map( + static fn (int $i) => sprintf('%s/file%02d.txt', $tempDir, $i), + range(0, 49), + ); + } +}