From 4dc5cd610eef22e128058bf85d6457727064af8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Tue, 19 Sep 2023 22:07:55 +0200 Subject: [PATCH 1/8] PHPLIB-1237 Implement parallel benchmarks --- benchmark/DriverBench/ParallelBench.php | 193 ++++++++++++++++++++++++ benchmark/Fixtures/Data.php | 1 + benchmark/Fixtures/data/ldjson.json | 1 + benchmark/Utils.php | 7 + 4 files changed, 202 insertions(+) create mode 100644 benchmark/DriverBench/ParallelBench.php create mode 100644 benchmark/Fixtures/data/ldjson.json diff --git a/benchmark/DriverBench/ParallelBench.php b/benchmark/DriverBench/ParallelBench.php new file mode 100644 index 000000000..e55af9f0d --- /dev/null +++ b/benchmark/DriverBench/ParallelBench.php @@ -0,0 +1,193 @@ += $params['processes']) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + + $pid = pcntl_fork(); + if ($pid === 0) { + // If we reset, we can garantee that we get a new manager in the child process + // If we don't reset, we will get the same manager client_zval in the child process + // and share the libmongoc client. + Utils::reset(); + $collection = Utils::getCollection(); + + foreach ($files as $file) { + self::importFile($file, $collection); + } + + // Exit the child process + exit(0); + } + + if ($pid === -1) { + throw new RuntimeException('Failed to fork'); + } + + // Keep the forked process id to wait for it later + $pids[$pid] = true; + } + + // Wait for all child processes to finish + while ($pids !== []) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + } + + public static function provideProcessesParameter(): Generator + { + // Max number of forked processes + for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) { + yield $i . 'fork' => ['processes' => $i]; + } + } + + public static function provideMultiFileImportParameters(): Generator + { + $files = self::getFileNames(); + + // Chunk of file names to be handled by each processes + for ($i = 1; $i <= 10; $i += 3) { + yield 'by ' . $i => ['files' => array_chunk($files, $i)]; + } + } + + public function beforeMultiFileImport(): void + { + $database = Utils::getDatabase(); + $database->drop(); + $database->createCollection(Utils::getCollectionName()); + } + + public function afterMultiFileImport(): void + { + foreach (self::$files as $file) { + unlink($file); + } + + unset($this->files); + } + + private static function importFile(string $file, Collection $collection): void + { + // Read file contents into BSON documents + $docs = array_map( + static fn (string $line) => Document::fromJSON($line), + file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT), + ); + // Insert documents in bulk + $collection->insertMany($docs); + } + + private static function getFileNames(): array + { + $tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark'; + if (! is_dir($tempDir)) { + mkdir($tempDir); + } + + return array_map( + static fn (int $i) => sprintf('%s/%03d.txt', $tempDir, $i), + //range(0, 99), + range(0, 5), + ); + } +} diff --git a/benchmark/Fixtures/Data.php b/benchmark/Fixtures/Data.php index a60d3d571..9c21771d4 100644 --- a/benchmark/Fixtures/Data.php +++ b/benchmark/Fixtures/Data.php @@ -16,6 +16,7 @@ final class Data public const LARGE_FILE_PATH = __DIR__ . '/data/large_doc.json'; public const SMALL_FILE_PATH = __DIR__ . '/data/small_doc.json'; public const TWEET_FILE_PATH = __DIR__ . '/data/tweet.json'; + public const LDJSON_FILE_PATH = __DIR__ . '/data/ldjson.json'; public static function readJsonFile(string $path): array { diff --git a/benchmark/Fixtures/data/ldjson.json b/benchmark/Fixtures/data/ldjson.json new file mode 100644 index 000000000..3b1421232 --- /dev/null +++ b/benchmark/Fixtures/data/ldjson.json @@ -0,0 +1 @@ +{"text":"@wildfits you're not getting one.....","in_reply_to_status_id":22773233453,"retweet_count":null,"contributors":null,"created_at":"Thu Sep 02 19:38:18 +0000 2010","geo":null,"source":"web","coordinates":null,"in_reply_to_screen_name":"wildfits","truncated":false,"entities":{"user_mentions":[{"indices":[0,9],"screen_name":"wildfits","name":"Mairin Goetzinger","id":41832464}],"urls":[],"hashtags":[]},"retweeted":false,"place":null,"user":{"friends_count":179,"profile_sidebar_fill_color":"7a7a7a","location":"Minneapols, MN/Brookings SD","verified":false,"follow_request_sent":null,"favourites_count":0,"profile_sidebar_border_color":"a3a3a3","profile_image_url":"http://a1.twimg.com/profile_images/1110614677/Screen_shot_2010-08-25_at_10.12.40_AM_normal.png","geo_enabled":false,"created_at":"Sun Aug 17 00:23:13 +0000 2008","description":"graphic designer + foodie, with a love of music, movies, running, design, + the outdoors!","time_zone":"Mountain Time (US & Canada)","url":"http://jessiefarris.com/","screen_name":"jessiekf","notifications":null,"profile_background_color":"303030","listed_count":1,"lang":"en"}} diff --git a/benchmark/Utils.php b/benchmark/Utils.php index c19221be4..43ac051d9 100644 --- a/benchmark/Utils.php +++ b/benchmark/Utils.php @@ -43,4 +43,11 @@ public static function getCollectionName(): string { return 'perftest'; } + + public static function reset(): void + { + self::$client = null; + self::$database = null; + self::$collection = null; + } } From 02d633f955062604a2bf1f67e5dc5e5e47a3234f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Tue, 19 Sep 2023 23:08:43 +0200 Subject: [PATCH 2/8] Add parallel benchmark using AMP async framework --- .github/workflows/benchmark.yml | 5 +- benchmark/composer.json | 26 +++++++++ .../phpbench.json.dist | 2 +- benchmark/{ => src}/BSON/DocumentBench.php | 0 benchmark/{ => src}/BSON/PackedArrayBench.php | 0 .../{ => src}/DriverBench/GridFSBench.php | 0 .../{ => src}/DriverBench/MultiDocBench.php | 0 .../{ => src}/DriverBench/ParallelBench.php | 56 ++++++++++--------- .../{ => src}/DriverBench/SingleDocBench.php | 0 .../Extension/EnvironmentProvider.php | 0 .../{ => src}/Extension/MongoDBExtension.php | 0 benchmark/{ => src}/Fixtures/Data.php | 0 .../{ => src}/Fixtures/PassThruCodec.php | 0 .../{ => src}/Fixtures/ToObjectCodec.php | 0 .../{ => src}/Fixtures/data/large_doc.json | 0 benchmark/{ => src}/Fixtures/data/ldjson.json | 0 .../{ => src}/Fixtures/data/small_doc.json | 0 benchmark/{ => src}/Fixtures/data/tweet.json | 0 .../{ => src}/ReadLargeDocumentBench.php | 0 .../{ => src}/ReadMultipleDocumentsBench.php | 0 benchmark/{ => src}/Utils.php | 0 composer.json | 5 +- phpcs.xml.dist | 2 +- 23 files changed, 63 insertions(+), 33 deletions(-) create mode 100644 benchmark/composer.json rename phpbench.json.dist => benchmark/phpbench.json.dist (93%) rename benchmark/{ => src}/BSON/DocumentBench.php (100%) rename benchmark/{ => src}/BSON/PackedArrayBench.php (100%) rename benchmark/{ => src}/DriverBench/GridFSBench.php (100%) rename benchmark/{ => src}/DriverBench/MultiDocBench.php (100%) rename benchmark/{ => src}/DriverBench/ParallelBench.php (80%) rename benchmark/{ => src}/DriverBench/SingleDocBench.php (100%) rename benchmark/{ => src}/Extension/EnvironmentProvider.php (100%) rename benchmark/{ => src}/Extension/MongoDBExtension.php (100%) rename benchmark/{ => src}/Fixtures/Data.php (100%) rename benchmark/{ => src}/Fixtures/PassThruCodec.php (100%) rename benchmark/{ => src}/Fixtures/ToObjectCodec.php (100%) rename benchmark/{ => src}/Fixtures/data/large_doc.json (100%) rename benchmark/{ => src}/Fixtures/data/ldjson.json (100%) rename benchmark/{ => src}/Fixtures/data/small_doc.json (100%) rename benchmark/{ => src}/Fixtures/data/tweet.json (100%) rename benchmark/{ => src}/ReadLargeDocumentBench.php (100%) rename benchmark/{ => src}/ReadMultipleDocumentsBench.php (100%) rename benchmark/{ => src}/Utils.php (100%) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index e814b2b3b..510941412 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -64,14 +64,15 @@ jobs: - name: "Install dependencies with Composer" uses: "ramsey/composer-install@2.2.0" with: - composer-options: "--no-suggest" + composer-options: "--no-suggest --working-dir=./benchmark" - name: "Run phpbench" + working-directory: "./benchmark" run: "vendor/bin/phpbench run --report=aggregate --report=bar_chart_time --report=env --output html" - name: Upload HTML report uses: actions/upload-artifact@v3 with: name: phpbench-${{ github.sha }}.html - path: .phpbench/html/index.html + path: ./benchmark/.phpbench/html/index.html retention-days: 3 diff --git a/benchmark/composer.json b/benchmark/composer.json new file mode 100644 index 000000000..479d5e78c --- /dev/null +++ b/benchmark/composer.json @@ -0,0 +1,26 @@ +{ + "name": "mongodb/mongodb-benchmark", + "type": "project", + "repositories": [ + { + "type": "path", + "url": "../", + "symlink": true + } + ], + "require": { + "php": ">=8.1", + "ext-pcntl": "*", + "amphp/parallel-functions": "^1.1", + "mongodb/mongodb": "@dev", + "phpbench/phpbench": "^1.2" + }, + "autoload": { + "psr-4": { + "MongoDB\\Benchmark\\": "src/" + } + }, + "scripts": { + "benchmark": "phpbench run --report=aggregate" + } +} \ No newline at end of file diff --git a/phpbench.json.dist b/benchmark/phpbench.json.dist similarity index 93% rename from phpbench.json.dist rename to benchmark/phpbench.json.dist index 5fd50e4ec..f3acbd04c 100644 --- a/phpbench.json.dist +++ b/benchmark/phpbench.json.dist @@ -4,7 +4,7 @@ "runner.env_enabled_providers": ["mongodb","sampler","git","opcache","php","uname","unix_sysload"], "runner.bootstrap": "vendor/autoload.php", "runner.file_pattern": "*Bench.php", - "runner.path": "benchmark", + "runner.path": "src", "runner.php_config": { "memory_limit": "1G" }, "runner.iterations": 3, "runner.revs": 10 diff --git a/benchmark/BSON/DocumentBench.php b/benchmark/src/BSON/DocumentBench.php similarity index 100% rename from benchmark/BSON/DocumentBench.php rename to benchmark/src/BSON/DocumentBench.php diff --git a/benchmark/BSON/PackedArrayBench.php b/benchmark/src/BSON/PackedArrayBench.php similarity index 100% rename from benchmark/BSON/PackedArrayBench.php rename to benchmark/src/BSON/PackedArrayBench.php diff --git a/benchmark/DriverBench/GridFSBench.php b/benchmark/src/DriverBench/GridFSBench.php similarity index 100% rename from benchmark/DriverBench/GridFSBench.php rename to benchmark/src/DriverBench/GridFSBench.php diff --git a/benchmark/DriverBench/MultiDocBench.php b/benchmark/src/DriverBench/MultiDocBench.php similarity index 100% rename from benchmark/DriverBench/MultiDocBench.php rename to benchmark/src/DriverBench/MultiDocBench.php diff --git a/benchmark/DriverBench/ParallelBench.php b/benchmark/src/DriverBench/ParallelBench.php similarity index 80% rename from benchmark/DriverBench/ParallelBench.php rename to benchmark/src/DriverBench/ParallelBench.php index e55af9f0d..a6177ba6c 100644 --- a/benchmark/DriverBench/ParallelBench.php +++ b/benchmark/src/DriverBench/ParallelBench.php @@ -2,6 +2,7 @@ namespace MongoDB\Benchmark\DriverBench; +use Amp\Parallel\Worker\DefaultPool; use Generator; use MongoDB\Benchmark\Fixtures\Data; use MongoDB\Benchmark\Utils; @@ -14,7 +15,8 @@ use PhpBench\Attributes\Revs; use RuntimeException; -use function array_chunk; +use function Amp\ParallelFunctions\parallelMap; +use function Amp\Promise\wait; use function array_map; use function ceil; use function count; @@ -68,8 +70,6 @@ public static function afterClass(): void /** * Parallel: LDJSON multi-file import * Using single thread - * - * @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import */ #[BeforeMethods('beforeMultiFileImport')] #[Revs(1)] @@ -85,16 +85,15 @@ public function benchMultiFileImport(): void * Parallel: LDJSON multi-file import * Using multiple forked threads * - * @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import * @param array{processes:int, files:string[], batchSize:int} $params */ #[BeforeMethods('beforeMultiFileImport')] - #[ParamProviders(['provideProcessesParameter', 'provideMultiFileImportParameters'])] + #[ParamProviders(['provideProcessesParameter'])] #[Revs(1)] public function benchMultiFileImportFork(array $params): void { $pids = []; - foreach ($params['files'] as $files) { + foreach (self::getFileNames() as $file) { // Wait for a child process to finish if we have reached the maximum number of processes if (count($pids) >= $params['processes']) { $pid = pcntl_waitpid(-1, $status); @@ -107,11 +106,7 @@ public function benchMultiFileImportFork(array $params): void // If we don't reset, we will get the same manager client_zval in the child process // and share the libmongoc client. Utils::reset(); - $collection = Utils::getCollection(); - - foreach ($files as $file) { - self::importFile($file, $collection); - } + self::importFile($file, Utils::getCollection()); // Exit the child process exit(0); @@ -132,21 +127,31 @@ public function benchMultiFileImportFork(array $params): void } } - public static function provideProcessesParameter(): Generator + /** + * Parallel: LDJSON multi-file import + * Using amphp/parallel-functions with worker pool + * + * @param array{processes:int, files:string[], batchSize:int} $params + */ + #[BeforeMethods('beforeMultiFileImport')] + #[ParamProviders(['provideProcessesParameter'])] + #[Revs(1)] + public function benchMultiFileImportAmp(array $params): void { - // Max number of forked processes - for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) { - yield $i . 'fork' => ['processes' => $i]; - } + wait(parallelMap( + self::getFileNames(), + // Uses array callable instead of closure to skip complex serialization + [self::class, 'importFile'], + // The pool size is the number of processes + new DefaultPool($params['processes']), + )); } - public static function provideMultiFileImportParameters(): Generator + public static function provideProcessesParameter(): Generator { - $files = self::getFileNames(); - - // Chunk of file names to be handled by each processes - for ($i = 1; $i <= 10; $i += 3) { - yield 'by ' . $i => ['files' => array_chunk($files, $i)]; + // Max number of forked processes + for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) { + yield $i . ' proc' => ['processes' => $i]; } } @@ -166,8 +171,10 @@ public function afterMultiFileImport(): void unset($this->files); } - private static function importFile(string $file, Collection $collection): void + public static function importFile(string $file, ?Collection $collection = null): void { + $collection ??= Utils::getCollection(); + // Read file contents into BSON documents $docs = array_map( static fn (string $line) => Document::fromJSON($line), @@ -186,8 +193,7 @@ private static function getFileNames(): array return array_map( static fn (int $i) => sprintf('%s/%03d.txt', $tempDir, $i), - //range(0, 99), - range(0, 5), + range(0, 99), ); } } diff --git a/benchmark/DriverBench/SingleDocBench.php b/benchmark/src/DriverBench/SingleDocBench.php similarity index 100% rename from benchmark/DriverBench/SingleDocBench.php rename to benchmark/src/DriverBench/SingleDocBench.php diff --git a/benchmark/Extension/EnvironmentProvider.php b/benchmark/src/Extension/EnvironmentProvider.php similarity index 100% rename from benchmark/Extension/EnvironmentProvider.php rename to benchmark/src/Extension/EnvironmentProvider.php diff --git a/benchmark/Extension/MongoDBExtension.php b/benchmark/src/Extension/MongoDBExtension.php similarity index 100% rename from benchmark/Extension/MongoDBExtension.php rename to benchmark/src/Extension/MongoDBExtension.php diff --git a/benchmark/Fixtures/Data.php b/benchmark/src/Fixtures/Data.php similarity index 100% rename from benchmark/Fixtures/Data.php rename to benchmark/src/Fixtures/Data.php diff --git a/benchmark/Fixtures/PassThruCodec.php b/benchmark/src/Fixtures/PassThruCodec.php similarity index 100% rename from benchmark/Fixtures/PassThruCodec.php rename to benchmark/src/Fixtures/PassThruCodec.php diff --git a/benchmark/Fixtures/ToObjectCodec.php b/benchmark/src/Fixtures/ToObjectCodec.php similarity index 100% rename from benchmark/Fixtures/ToObjectCodec.php rename to benchmark/src/Fixtures/ToObjectCodec.php diff --git a/benchmark/Fixtures/data/large_doc.json b/benchmark/src/Fixtures/data/large_doc.json similarity index 100% rename from benchmark/Fixtures/data/large_doc.json rename to benchmark/src/Fixtures/data/large_doc.json diff --git a/benchmark/Fixtures/data/ldjson.json b/benchmark/src/Fixtures/data/ldjson.json similarity index 100% rename from benchmark/Fixtures/data/ldjson.json rename to benchmark/src/Fixtures/data/ldjson.json diff --git a/benchmark/Fixtures/data/small_doc.json b/benchmark/src/Fixtures/data/small_doc.json similarity index 100% rename from benchmark/Fixtures/data/small_doc.json rename to benchmark/src/Fixtures/data/small_doc.json diff --git a/benchmark/Fixtures/data/tweet.json b/benchmark/src/Fixtures/data/tweet.json similarity index 100% rename from benchmark/Fixtures/data/tweet.json rename to benchmark/src/Fixtures/data/tweet.json diff --git a/benchmark/ReadLargeDocumentBench.php b/benchmark/src/ReadLargeDocumentBench.php similarity index 100% rename from benchmark/ReadLargeDocumentBench.php rename to benchmark/src/ReadLargeDocumentBench.php diff --git a/benchmark/ReadMultipleDocumentsBench.php b/benchmark/src/ReadMultipleDocumentsBench.php similarity index 100% rename from benchmark/ReadMultipleDocumentsBench.php rename to benchmark/src/ReadMultipleDocumentsBench.php diff --git a/benchmark/Utils.php b/benchmark/src/Utils.php similarity index 100% rename from benchmark/Utils.php rename to benchmark/src/Utils.php diff --git a/composer.json b/composer.json index 5e890b223..a7153f01f 100644 --- a/composer.json +++ b/composer.json @@ -21,7 +21,6 @@ }, "require-dev": { "doctrine/coding-standard": "^11.1", - "phpbench/phpbench": "^1.2", "rector/rector": "^0.16.0", "squizlabs/php_codesniffer": "^3.7", "symfony/phpunit-bridge": "^5.2", @@ -33,13 +32,11 @@ }, "autoload-dev": { "psr-4": { - "MongoDB\\Tests\\": "tests/", - "MongoDB\\Benchmark\\": "benchmark/" + "MongoDB\\Tests\\": "tests/" }, "files": [ "tests/PHPUnit/Functions.php" ] }, "scripts": { - "benchmark": "phpbench run --report=aggregate", "checks": [ "@check:cs", "@check:psalm", diff --git a/phpcs.xml.dist b/phpcs.xml.dist index 71cc6054d..9cee3b66a 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -9,7 +9,7 @@ - benchmark + benchmark/src src docs/examples examples From 158cf1dbf93ee7f365ab1415df3694ee35b99166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 20 Sep 2023 12:46:18 +0200 Subject: [PATCH 3/8] Add bulkwrite single process benchmark --- benchmark/src/DriverBench/ParallelBench.php | 79 ++++++++++++--------- benchmark/src/Utils.php | 2 +- composer.json | 1 + 3 files changed, 47 insertions(+), 35 deletions(-) diff --git a/benchmark/src/DriverBench/ParallelBench.php b/benchmark/src/DriverBench/ParallelBench.php index a6177ba6c..a590dbdaf 100644 --- a/benchmark/src/DriverBench/ParallelBench.php +++ b/benchmark/src/DriverBench/ParallelBench.php @@ -7,10 +7,11 @@ use MongoDB\Benchmark\Fixtures\Data; use MongoDB\Benchmark\Utils; use MongoDB\BSON\Document; -use MongoDB\Collection; +use MongoDB\Driver\BulkWrite; use PhpBench\Attributes\AfterClassMethods; use PhpBench\Attributes\BeforeClassMethods; use PhpBench\Attributes\BeforeMethods; +use PhpBench\Attributes\Iterations; use PhpBench\Attributes\ParamProviders; use PhpBench\Attributes\Revs; use RuntimeException; @@ -18,7 +19,6 @@ use function Amp\ParallelFunctions\parallelMap; use function Amp\Promise\wait; use function array_map; -use function ceil; use function count; use function file; use function file_get_contents; @@ -46,9 +46,6 @@ #[AfterClassMethods('afterClass')] final class ParallelBench { - /** @var string[] */ - private static array $files = []; - public static function beforeClass(): void { // Generate files @@ -63,21 +60,40 @@ public static function afterClass(): void foreach (self::getFileNames() as $file) { unlink($file); } + } - self::$files = []; + /** + * Parallel: LDJSON multi-file import + * Using Driver's BulkWrite in a single thread + */ + #[BeforeMethods('beforeMultiFileImport')] + #[Revs(1)] + #[Iterations(1)] + public function benchMultiFileImportBulkWrite(): void + { + foreach (self::getFileNames() as $file) { + self::importFile($file); + } } /** * Parallel: LDJSON multi-file import - * Using single thread + * Using library's Collection::insertMany in a single thread */ #[BeforeMethods('beforeMultiFileImport')] #[Revs(1)] - public function benchMultiFileImport(): void + #[Iterations(1)] + public function benchMultiFileImportInsertMany(): void { $collection = Utils::getCollection(); foreach (self::getFileNames() as $file) { - self::importFile($file, $collection); + // Read file contents into BSON documents + $docs = array_map( + static fn (string $line) => Document::fromJSON($line), + file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT), + ); + // Insert documents in bulk + $collection->insertMany($docs); } } @@ -90,6 +106,7 @@ public function benchMultiFileImport(): void #[BeforeMethods('beforeMultiFileImport')] #[ParamProviders(['provideProcessesParameter'])] #[Revs(1)] + #[Iterations(1)] public function benchMultiFileImportFork(array $params): void { $pids = []; @@ -102,11 +119,11 @@ public function benchMultiFileImportFork(array $params): void $pid = pcntl_fork(); if ($pid === 0) { - // If we reset, we can garantee that we get a new manager in the child process - // If we don't reset, we will get the same manager client_zval in the child process - // and share the libmongoc client. + // Reset to ensure that the existing libmongoc client (via the Manager) is not re-used by the child + // process. When the child process constructs a new Manager, the differing PID will result in creation + // of a new libmongoc client. Utils::reset(); - self::importFile($file, Utils::getCollection()); + self::importFile($file); // Exit the child process exit(0); @@ -136,6 +153,7 @@ public function benchMultiFileImportFork(array $params): void #[BeforeMethods('beforeMultiFileImport')] #[ParamProviders(['provideProcessesParameter'])] #[Revs(1)] + #[Iterations(1)] public function benchMultiFileImportAmp(array $params): void { wait(parallelMap( @@ -149,10 +167,13 @@ public function benchMultiFileImportAmp(array $params): void public static function provideProcessesParameter(): Generator { - // Max number of forked processes - for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) { - yield $i . ' proc' => ['processes' => $i]; - } + yield '1 proc' => ['processes' => 1]; // 100 sequences, to compare to the single thread baseline + yield '2 proc' => ['processes' => 2]; // 50 sequences + yield '4 proc' => ['processes' => 4]; // 25 sequences + yield '8 proc' => ['processes' => 8]; // 13 sequences + yield '13 proc' => ['processes' => 13]; // 8 sequences + yield '20 proc' => ['processes' => 20]; // 5 sequences + yield '34 proc' => ['processes' => 34]; // 3 sequences } public function beforeMultiFileImport(): void @@ -162,26 +183,16 @@ public function beforeMultiFileImport(): void $database->createCollection(Utils::getCollectionName()); } - public function afterMultiFileImport(): void + public static function importFile(string $file): void { - foreach (self::$files as $file) { - unlink($file); - } + $namespace = sprintf('%s.%s', Utils::getDatabaseName(), Utils::getCollectionName()); - unset($this->files); - } - - public static function importFile(string $file, ?Collection $collection = null): void - { - $collection ??= Utils::getCollection(); + $bulkWrite = new BulkWrite(); + foreach (file($file, FILE_IGNORE_NEW_LINES | FILE_NO_DEFAULT_CONTEXT) as $line) { + $bulkWrite->insert(Document::fromJSON($line)); + } - // Read file contents into BSON documents - $docs = array_map( - static fn (string $line) => Document::fromJSON($line), - file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT), - ); - // Insert documents in bulk - $collection->insertMany($docs); + Utils::getClient()->getManager()->executeBulkWrite($namespace, $bulkWrite); } private static function getFileNames(): array diff --git a/benchmark/src/Utils.php b/benchmark/src/Utils.php index 43ac051d9..a81ba7d2b 100644 --- a/benchmark/src/Utils.php +++ b/benchmark/src/Utils.php @@ -16,7 +16,7 @@ final class Utils public static function getClient(): Client { - return self::$client ??= new Client(self::getUri()); + return self::$client ??= new Client(self::getUri(), ['disableClientPersistence' => true]); } public static function getDatabase(): Database diff --git a/composer.json b/composer.json index a7153f01f..6a585b538 100644 --- a/composer.json +++ b/composer.json @@ -37,6 +37,7 @@ "files": [ "tests/PHPUnit/Functions.php" ] }, "scripts": { + "bench": "cd benchmark && composer update && vendor/bin/phpbench run --report=aggregate", "checks": [ "@check:cs", "@check:psalm", From f9748878677762ebd4c9320efff1406b957efd74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 20 Sep 2023 16:51:53 +0200 Subject: [PATCH 4/8] Fix disableClientPersistence --- benchmark/src/Utils.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/src/Utils.php b/benchmark/src/Utils.php index a81ba7d2b..b77699d5c 100644 --- a/benchmark/src/Utils.php +++ b/benchmark/src/Utils.php @@ -16,7 +16,7 @@ final class Utils public static function getClient(): Client { - return self::$client ??= new Client(self::getUri(), ['disableClientPersistence' => true]); + return self::$client ??= new Client(self::getUri(), [], ['disableClientPersistence' => true]); } public static function getDatabase(): Database From 59515b0d5330f6170b9fdb0dacd58994f67de97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 20 Sep 2023 17:18:08 +0200 Subject: [PATCH 5/8] Use fopen for best performances on reading a file --- benchmark/src/DriverBench/ParallelBench.php | 30 +++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/benchmark/src/DriverBench/ParallelBench.php b/benchmark/src/DriverBench/ParallelBench.php index a590dbdaf..d2fb05891 100644 --- a/benchmark/src/DriverBench/ParallelBench.php +++ b/benchmark/src/DriverBench/ParallelBench.php @@ -20,9 +20,11 @@ use function Amp\Promise\wait; use function array_map; use function count; -use function file; +use function fclose; +use function fgets; use function file_get_contents; use function file_put_contents; +use function fopen; use function is_dir; use function mkdir; use function pcntl_fork; @@ -33,10 +35,6 @@ use function sys_get_temp_dir; use function unlink; -use const FILE_IGNORE_NEW_LINES; -use const FILE_NO_DEFAULT_CONTEXT; -use const FILE_SKIP_EMPTY_LINES; - /** * For accurate results, run benchmarks on a standalone server. * @@ -87,11 +85,17 @@ public function benchMultiFileImportInsertMany(): void { $collection = Utils::getCollection(); foreach (self::getFileNames() as $file) { + $docs = []; // Read file contents into BSON documents - $docs = array_map( - static fn (string $line) => Document::fromJSON($line), - file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT), - ); + $fh = fopen($file, 'r'); + while (($line = fgets($fh)) !== false) { + if ($line !== '') { + $docs[] = Document::fromJSON($line); + } + } + + fclose($fh); + // Insert documents in bulk $collection->insertMany($docs); } @@ -188,10 +192,14 @@ public static function importFile(string $file): void $namespace = sprintf('%s.%s', Utils::getDatabaseName(), Utils::getCollectionName()); $bulkWrite = new BulkWrite(); - foreach (file($file, FILE_IGNORE_NEW_LINES | FILE_NO_DEFAULT_CONTEXT) as $line) { - $bulkWrite->insert(Document::fromJSON($line)); + $fh = fopen($file, 'r'); + while (($line = fgets($fh)) !== false) { + if ($line !== '') { + $bulkWrite->insert(Document::fromJSON($line)); + } } + fclose($fh); Utils::getClient()->getManager()->executeBulkWrite($namespace, $bulkWrite); } From 0f9ce7c639c891f1a22e95150b01a00b96b558b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 20 Sep 2023 17:33:50 +0200 Subject: [PATCH 6/8] Use dedicated class for ParallelMultiFileImportBench --- ...h.php => ParallelMultiFileImportBench.php} | 36 +++++++------------ 1 file changed, 12 insertions(+), 24 deletions(-) rename benchmark/src/DriverBench/{ParallelBench.php => ParallelMultiFileImportBench.php} (91%) diff --git a/benchmark/src/DriverBench/ParallelBench.php b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php similarity index 91% rename from benchmark/src/DriverBench/ParallelBench.php rename to benchmark/src/DriverBench/ParallelMultiFileImportBench.php index d2fb05891..bd3accdb7 100644 --- a/benchmark/src/DriverBench/ParallelBench.php +++ b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php @@ -38,11 +38,14 @@ /** * For accurate results, run benchmarks on a standalone server. * - * @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#parallel + * @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import */ #[BeforeClassMethods('beforeClass')] #[AfterClassMethods('afterClass')] -final class ParallelBench +#[BeforeMethods('beforeIteration')] +#[Revs(1)] +#[Iterations(1)] +final class ParallelMultiFileImportBench { public static function beforeClass(): void { @@ -60,13 +63,17 @@ public static function afterClass(): void } } + public function beforeIteration(): void + { + $database = Utils::getDatabase(); + $database->drop(); + $database->createCollection(Utils::getCollectionName()); + } + /** * Parallel: LDJSON multi-file import * Using Driver's BulkWrite in a single thread */ - #[BeforeMethods('beforeMultiFileImport')] - #[Revs(1)] - #[Iterations(1)] public function benchMultiFileImportBulkWrite(): void { foreach (self::getFileNames() as $file) { @@ -75,12 +82,8 @@ public function benchMultiFileImportBulkWrite(): void } /** - * Parallel: LDJSON multi-file import * Using library's Collection::insertMany in a single thread */ - #[BeforeMethods('beforeMultiFileImport')] - #[Revs(1)] - #[Iterations(1)] public function benchMultiFileImportInsertMany(): void { $collection = Utils::getCollection(); @@ -102,15 +105,11 @@ public function benchMultiFileImportInsertMany(): void } /** - * Parallel: LDJSON multi-file import * Using multiple forked threads * * @param array{processes:int, files:string[], batchSize:int} $params */ - #[BeforeMethods('beforeMultiFileImport')] #[ParamProviders(['provideProcessesParameter'])] - #[Revs(1)] - #[Iterations(1)] public function benchMultiFileImportFork(array $params): void { $pids = []; @@ -149,15 +148,11 @@ public function benchMultiFileImportFork(array $params): void } /** - * Parallel: LDJSON multi-file import * Using amphp/parallel-functions with worker pool * * @param array{processes:int, files:string[], batchSize:int} $params */ - #[BeforeMethods('beforeMultiFileImport')] #[ParamProviders(['provideProcessesParameter'])] - #[Revs(1)] - #[Iterations(1)] public function benchMultiFileImportAmp(array $params): void { wait(parallelMap( @@ -180,13 +175,6 @@ public static function provideProcessesParameter(): Generator yield '34 proc' => ['processes' => 34]; // 3 sequences } - public function beforeMultiFileImport(): void - { - $database = Utils::getDatabase(); - $database->drop(); - $database->createCollection(Utils::getCollectionName()); - } - public static function importFile(string $file): void { $namespace = sprintf('%s.%s', Utils::getDatabaseName(), Utils::getCollectionName()); From 7310d97a672eff72c0609ff246849d138bb48119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 20 Sep 2023 18:46:33 +0200 Subject: [PATCH 7/8] Use stream_get_line --- .../DriverBench/ParallelMultiFileImportBench.php | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/benchmark/src/DriverBench/ParallelMultiFileImportBench.php b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php index bd3accdb7..74e7de3c4 100644 --- a/benchmark/src/DriverBench/ParallelMultiFileImportBench.php +++ b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php @@ -32,6 +32,7 @@ use function range; use function sprintf; use function str_repeat; +use function stream_get_line; use function sys_get_temp_dir; use function unlink; @@ -43,8 +44,8 @@ #[BeforeClassMethods('beforeClass')] #[AfterClassMethods('afterClass')] #[BeforeMethods('beforeIteration')] -#[Revs(1)] #[Iterations(1)] +#[Revs(1)] final class ParallelMultiFileImportBench { public static function beforeClass(): void @@ -175,16 +176,21 @@ public static function provideProcessesParameter(): Generator yield '34 proc' => ['processes' => 34]; // 3 sequences } + /** + * We benchmarked the following solutions to read a file line by line: + * - file + * - SplFileObject + * - fgets + * - stream_get_line 🏆 + */ public static function importFile(string $file): void { $namespace = sprintf('%s.%s', Utils::getDatabaseName(), Utils::getCollectionName()); $bulkWrite = new BulkWrite(); $fh = fopen($file, 'r'); - while (($line = fgets($fh)) !== false) { - if ($line !== '') { - $bulkWrite->insert(Document::fromJSON($line)); - } + while (($line = stream_get_line($fh, 10_000, "\n")) !== false) { + $bulkWrite->insert(Document::fromJSON($line)); } fclose($fh); From b06ae9db66c679eedd598ac28cd81f56f42432c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 20 Sep 2023 19:23:32 +0200 Subject: [PATCH 8/8] Cleanups --- benchmark/composer.json | 2 +- benchmark/src/DriverBench/ParallelMultiFileImportBench.php | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/benchmark/composer.json b/benchmark/composer.json index 479d5e78c..f42dc749d 100644 --- a/benchmark/composer.json +++ b/benchmark/composer.json @@ -23,4 +23,4 @@ "scripts": { "benchmark": "phpbench run --report=aggregate" } -} \ No newline at end of file +} diff --git a/benchmark/src/DriverBench/ParallelMultiFileImportBench.php b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php index 74e7de3c4..159462c8e 100644 --- a/benchmark/src/DriverBench/ParallelMultiFileImportBench.php +++ b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php @@ -72,7 +72,6 @@ public function beforeIteration(): void } /** - * Parallel: LDJSON multi-file import * Using Driver's BulkWrite in a single thread */ public function benchMultiFileImportBulkWrite(): void @@ -197,6 +196,11 @@ public static function importFile(string $file): void Utils::getClient()->getManager()->executeBulkWrite($namespace, $bulkWrite); } + /** + * Using a method to regenerate the file names because we cannot cache the result of the method in a static + * property. The benchmark runner will call the method in a different process, so the static property will not be + * populated. + */ private static function getFileNames(): array { $tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark';