From 4dc5cd610eef22e128058bf85d6457727064af8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Tue, 19 Sep 2023 22:07:55 +0200 Subject: [PATCH] PHPLIB-1237 Implement parallel benchmarks --- benchmark/DriverBench/ParallelBench.php | 193 ++++++++++++++++++++++++ benchmark/Fixtures/Data.php | 1 + benchmark/Fixtures/data/ldjson.json | 1 + benchmark/Utils.php | 7 + 4 files changed, 202 insertions(+) create mode 100644 benchmark/DriverBench/ParallelBench.php create mode 100644 benchmark/Fixtures/data/ldjson.json diff --git a/benchmark/DriverBench/ParallelBench.php b/benchmark/DriverBench/ParallelBench.php new file mode 100644 index 000000000..e55af9f0d --- /dev/null +++ b/benchmark/DriverBench/ParallelBench.php @@ -0,0 +1,193 @@ += $params['processes']) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + + $pid = pcntl_fork(); + if ($pid === 0) { + // If we reset, we can garantee that we get a new manager in the child process + // If we don't reset, we will get the same manager client_zval in the child process + // and share the libmongoc client. + Utils::reset(); + $collection = Utils::getCollection(); + + foreach ($files as $file) { + self::importFile($file, $collection); + } + + // Exit the child process + exit(0); + } + + if ($pid === -1) { + throw new RuntimeException('Failed to fork'); + } + + // Keep the forked process id to wait for it later + $pids[$pid] = true; + } + + // Wait for all child processes to finish + while ($pids !== []) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + } + + public static function provideProcessesParameter(): Generator + { + // Max number of forked processes + for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) { + yield $i . 'fork' => ['processes' => $i]; + } + } + + public static function provideMultiFileImportParameters(): Generator + { + $files = self::getFileNames(); + + // Chunk of file names to be handled by each processes + for ($i = 1; $i <= 10; $i += 3) { + yield 'by ' . $i => ['files' => array_chunk($files, $i)]; + } + } + + public function beforeMultiFileImport(): void + { + $database = Utils::getDatabase(); + $database->drop(); + $database->createCollection(Utils::getCollectionName()); + } + + public function afterMultiFileImport(): void + { + foreach (self::$files as $file) { + unlink($file); + } + + unset($this->files); + } + + private static function importFile(string $file, Collection $collection): void + { + // Read file contents into BSON documents + $docs = array_map( + static fn (string $line) => Document::fromJSON($line), + file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT), + ); + // Insert documents in bulk + $collection->insertMany($docs); + } + + private static function getFileNames(): array + { + $tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark'; + if (! is_dir($tempDir)) { + mkdir($tempDir); + } + + return array_map( + static fn (int $i) => sprintf('%s/%03d.txt', $tempDir, $i), + //range(0, 99), + range(0, 5), + ); + } +} diff --git a/benchmark/Fixtures/Data.php b/benchmark/Fixtures/Data.php index a60d3d571..9c21771d4 100644 --- a/benchmark/Fixtures/Data.php +++ b/benchmark/Fixtures/Data.php @@ -16,6 +16,7 @@ final class Data public const LARGE_FILE_PATH = __DIR__ . '/data/large_doc.json'; public const SMALL_FILE_PATH = __DIR__ . '/data/small_doc.json'; public const TWEET_FILE_PATH = __DIR__ . '/data/tweet.json'; + public const LDJSON_FILE_PATH = __DIR__ . '/data/ldjson.json'; public static function readJsonFile(string $path): array { diff --git a/benchmark/Fixtures/data/ldjson.json b/benchmark/Fixtures/data/ldjson.json new file mode 100644 index 000000000..3b1421232 --- /dev/null +++ b/benchmark/Fixtures/data/ldjson.json @@ -0,0 +1 @@ +{"text":"@wildfits you're not getting one.....","in_reply_to_status_id":22773233453,"retweet_count":null,"contributors":null,"created_at":"Thu Sep 02 19:38:18 +0000 2010","geo":null,"source":"web","coordinates":null,"in_reply_to_screen_name":"wildfits","truncated":false,"entities":{"user_mentions":[{"indices":[0,9],"screen_name":"wildfits","name":"Mairin Goetzinger","id":41832464}],"urls":[],"hashtags":[]},"retweeted":false,"place":null,"user":{"friends_count":179,"profile_sidebar_fill_color":"7a7a7a","location":"Minneapols, MN/Brookings SD","verified":false,"follow_request_sent":null,"favourites_count":0,"profile_sidebar_border_color":"a3a3a3","profile_image_url":"http://a1.twimg.com/profile_images/1110614677/Screen_shot_2010-08-25_at_10.12.40_AM_normal.png","geo_enabled":false,"created_at":"Sun Aug 17 00:23:13 +0000 2008","description":"graphic designer + foodie, with a love of music, movies, running, design, + the outdoors!","time_zone":"Mountain Time (US & Canada)","url":"http://jessiefarris.com/","screen_name":"jessiekf","notifications":null,"profile_background_color":"303030","listed_count":1,"lang":"en"}} diff --git a/benchmark/Utils.php b/benchmark/Utils.php index c19221be4..43ac051d9 100644 --- a/benchmark/Utils.php +++ b/benchmark/Utils.php @@ -43,4 +43,11 @@ public static function getCollectionName(): string { return 'perftest'; } + + public static function reset(): void + { + self::$client = null; + self::$database = null; + self::$collection = null; + } }