diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index e814b2b3b..510941412 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -64,14 +64,15 @@ jobs: - name: "Install dependencies with Composer" uses: "ramsey/composer-install@2.2.0" with: - composer-options: "--no-suggest" + composer-options: "--no-suggest --working-dir=./benchmark" - name: "Run phpbench" + working-directory: "./benchmark" run: "vendor/bin/phpbench run --report=aggregate --report=bar_chart_time --report=env --output html" - name: Upload HTML report uses: actions/upload-artifact@v3 with: name: phpbench-${{ github.sha }}.html - path: .phpbench/html/index.html + path: ./benchmark/.phpbench/html/index.html retention-days: 3 diff --git a/benchmark/composer.json b/benchmark/composer.json new file mode 100644 index 000000000..f42dc749d --- /dev/null +++ b/benchmark/composer.json @@ -0,0 +1,26 @@ +{ + "name": "mongodb/mongodb-benchmark", + "type": "project", + "repositories": [ + { + "type": "path", + "url": "../", + "symlink": true + } + ], + "require": { + "php": ">=8.1", + "ext-pcntl": "*", + "amphp/parallel-functions": "^1.1", + "mongodb/mongodb": "@dev", + "phpbench/phpbench": "^1.2" + }, + "autoload": { + "psr-4": { + "MongoDB\\Benchmark\\": "src/" + } + }, + "scripts": { + "benchmark": "phpbench run --report=aggregate" + } +} diff --git a/phpbench.json.dist b/benchmark/phpbench.json.dist similarity index 93% rename from phpbench.json.dist rename to benchmark/phpbench.json.dist index 5fd50e4ec..f3acbd04c 100644 --- a/phpbench.json.dist +++ b/benchmark/phpbench.json.dist @@ -4,7 +4,7 @@ "runner.env_enabled_providers": ["mongodb","sampler","git","opcache","php","uname","unix_sysload"], "runner.bootstrap": "vendor/autoload.php", "runner.file_pattern": "*Bench.php", - "runner.path": "benchmark", + "runner.path": "src", "runner.php_config": { "memory_limit": "1G" }, "runner.iterations": 3, "runner.revs": 10 diff --git a/benchmark/BSON/DocumentBench.php b/benchmark/src/BSON/DocumentBench.php similarity index 100% rename from benchmark/BSON/DocumentBench.php rename to benchmark/src/BSON/DocumentBench.php diff --git a/benchmark/BSON/PackedArrayBench.php b/benchmark/src/BSON/PackedArrayBench.php similarity index 100% rename from benchmark/BSON/PackedArrayBench.php rename to benchmark/src/BSON/PackedArrayBench.php diff --git a/benchmark/DriverBench/GridFSBench.php b/benchmark/src/DriverBench/GridFSBench.php similarity index 100% rename from benchmark/DriverBench/GridFSBench.php rename to benchmark/src/DriverBench/GridFSBench.php diff --git a/benchmark/DriverBench/MultiDocBench.php b/benchmark/src/DriverBench/MultiDocBench.php similarity index 100% rename from benchmark/DriverBench/MultiDocBench.php rename to benchmark/src/DriverBench/MultiDocBench.php diff --git a/benchmark/src/DriverBench/ParallelMultiFileImportBench.php b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php new file mode 100644 index 000000000..159462c8e --- /dev/null +++ b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php @@ -0,0 +1,216 @@ +drop(); + $database->createCollection(Utils::getCollectionName()); + } + + /** + * Using Driver's BulkWrite in a single thread + */ + public function benchMultiFileImportBulkWrite(): void + { + foreach (self::getFileNames() as $file) { + self::importFile($file); + } + } + + /** + * Using library's Collection::insertMany in a single thread + */ + public function benchMultiFileImportInsertMany(): void + { + $collection = Utils::getCollection(); + foreach (self::getFileNames() as $file) { + $docs = []; + // Read file contents into BSON documents + $fh = fopen($file, 'r'); + while (($line = fgets($fh)) !== false) { + if ($line !== '') { + $docs[] = Document::fromJSON($line); + } + } + + fclose($fh); + + // Insert documents in bulk + $collection->insertMany($docs); + } + } + + /** + * Using multiple forked threads + * + * @param array{processes:int, files:string[], batchSize:int} $params + */ + #[ParamProviders(['provideProcessesParameter'])] + public function benchMultiFileImportFork(array $params): void + { + $pids = []; + foreach (self::getFileNames() as $file) { + // Wait for a child process to finish if we have reached the maximum number of processes + if (count($pids) >= $params['processes']) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + + $pid = pcntl_fork(); + if ($pid === 0) { + // Reset to ensure that the existing libmongoc client (via the Manager) is not re-used by the child + // process. When the child process constructs a new Manager, the differing PID will result in creation + // of a new libmongoc client. + Utils::reset(); + self::importFile($file); + + // Exit the child process + exit(0); + } + + if ($pid === -1) { + throw new RuntimeException('Failed to fork'); + } + + // Keep the forked process id to wait for it later + $pids[$pid] = true; + } + + // Wait for all child processes to finish + while ($pids !== []) { + $pid = pcntl_waitpid(-1, $status); + unset($pids[$pid]); + } + } + + /** + * Using amphp/parallel-functions with worker pool + * + * @param array{processes:int, files:string[], batchSize:int} $params + */ + #[ParamProviders(['provideProcessesParameter'])] + public function benchMultiFileImportAmp(array $params): void + { + wait(parallelMap( + self::getFileNames(), + // Uses array callable instead of closure to skip complex serialization + [self::class, 'importFile'], + // The pool size is the number of processes + new DefaultPool($params['processes']), + )); + } + + public static function provideProcessesParameter(): Generator + { + yield '1 proc' => ['processes' => 1]; // 100 sequences, to compare to the single thread baseline + yield '2 proc' => ['processes' => 2]; // 50 sequences + yield '4 proc' => ['processes' => 4]; // 25 sequences + yield '8 proc' => ['processes' => 8]; // 13 sequences + yield '13 proc' => ['processes' => 13]; // 8 sequences + yield '20 proc' => ['processes' => 20]; // 5 sequences + yield '34 proc' => ['processes' => 34]; // 3 sequences + } + + /** + * We benchmarked the following solutions to read a file line by line: + * - file + * - SplFileObject + * - fgets + * - stream_get_line 🏆 + */ + public static function importFile(string $file): void + { + $namespace = sprintf('%s.%s', Utils::getDatabaseName(), Utils::getCollectionName()); + + $bulkWrite = new BulkWrite(); + $fh = fopen($file, 'r'); + while (($line = stream_get_line($fh, 10_000, "\n")) !== false) { + $bulkWrite->insert(Document::fromJSON($line)); + } + + fclose($fh); + Utils::getClient()->getManager()->executeBulkWrite($namespace, $bulkWrite); + } + + /** + * Using a method to regenerate the file names because we cannot cache the result of the method in a static + * property. The benchmark runner will call the method in a different process, so the static property will not be + * populated. + */ + private static function getFileNames(): array + { + $tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark'; + if (! is_dir($tempDir)) { + mkdir($tempDir); + } + + return array_map( + static fn (int $i) => sprintf('%s/%03d.txt', $tempDir, $i), + range(0, 99), + ); + } +} diff --git a/benchmark/DriverBench/SingleDocBench.php b/benchmark/src/DriverBench/SingleDocBench.php similarity index 100% rename from benchmark/DriverBench/SingleDocBench.php rename to benchmark/src/DriverBench/SingleDocBench.php diff --git a/benchmark/Extension/EnvironmentProvider.php b/benchmark/src/Extension/EnvironmentProvider.php similarity index 100% rename from benchmark/Extension/EnvironmentProvider.php rename to benchmark/src/Extension/EnvironmentProvider.php diff --git a/benchmark/Extension/MongoDBExtension.php b/benchmark/src/Extension/MongoDBExtension.php similarity index 100% rename from benchmark/Extension/MongoDBExtension.php rename to benchmark/src/Extension/MongoDBExtension.php diff --git a/benchmark/Fixtures/Data.php b/benchmark/src/Fixtures/Data.php similarity index 93% rename from benchmark/Fixtures/Data.php rename to benchmark/src/Fixtures/Data.php index a60d3d571..9c21771d4 100644 --- a/benchmark/Fixtures/Data.php +++ b/benchmark/src/Fixtures/Data.php @@ -16,6 +16,7 @@ final class Data public const LARGE_FILE_PATH = __DIR__ . '/data/large_doc.json'; public const SMALL_FILE_PATH = __DIR__ . '/data/small_doc.json'; public const TWEET_FILE_PATH = __DIR__ . '/data/tweet.json'; + public const LDJSON_FILE_PATH = __DIR__ . '/data/ldjson.json'; public static function readJsonFile(string $path): array { diff --git a/benchmark/Fixtures/PassThruCodec.php b/benchmark/src/Fixtures/PassThruCodec.php similarity index 100% rename from benchmark/Fixtures/PassThruCodec.php rename to benchmark/src/Fixtures/PassThruCodec.php diff --git a/benchmark/Fixtures/ToObjectCodec.php b/benchmark/src/Fixtures/ToObjectCodec.php similarity index 100% rename from benchmark/Fixtures/ToObjectCodec.php rename to benchmark/src/Fixtures/ToObjectCodec.php diff --git a/benchmark/Fixtures/data/large_doc.json b/benchmark/src/Fixtures/data/large_doc.json similarity index 100% rename from benchmark/Fixtures/data/large_doc.json rename to benchmark/src/Fixtures/data/large_doc.json diff --git a/benchmark/src/Fixtures/data/ldjson.json b/benchmark/src/Fixtures/data/ldjson.json new file mode 100644 index 000000000..3b1421232 --- /dev/null +++ b/benchmark/src/Fixtures/data/ldjson.json @@ -0,0 +1 @@ +{"text":"@wildfits you're not getting one.....","in_reply_to_status_id":22773233453,"retweet_count":null,"contributors":null,"created_at":"Thu Sep 02 19:38:18 +0000 2010","geo":null,"source":"web","coordinates":null,"in_reply_to_screen_name":"wildfits","truncated":false,"entities":{"user_mentions":[{"indices":[0,9],"screen_name":"wildfits","name":"Mairin Goetzinger","id":41832464}],"urls":[],"hashtags":[]},"retweeted":false,"place":null,"user":{"friends_count":179,"profile_sidebar_fill_color":"7a7a7a","location":"Minneapols, MN/Brookings SD","verified":false,"follow_request_sent":null,"favourites_count":0,"profile_sidebar_border_color":"a3a3a3","profile_image_url":"http://a1.twimg.com/profile_images/1110614677/Screen_shot_2010-08-25_at_10.12.40_AM_normal.png","geo_enabled":false,"created_at":"Sun Aug 17 00:23:13 +0000 2008","description":"graphic designer + foodie, with a love of music, movies, running, design, + the outdoors!","time_zone":"Mountain Time (US & Canada)","url":"http://jessiefarris.com/","screen_name":"jessiekf","notifications":null,"profile_background_color":"303030","listed_count":1,"lang":"en"}} diff --git a/benchmark/Fixtures/data/small_doc.json b/benchmark/src/Fixtures/data/small_doc.json similarity index 100% rename from benchmark/Fixtures/data/small_doc.json rename to benchmark/src/Fixtures/data/small_doc.json diff --git a/benchmark/Fixtures/data/tweet.json b/benchmark/src/Fixtures/data/tweet.json similarity index 100% rename from benchmark/Fixtures/data/tweet.json rename to benchmark/src/Fixtures/data/tweet.json diff --git a/benchmark/ReadLargeDocumentBench.php b/benchmark/src/ReadLargeDocumentBench.php similarity index 100% rename from benchmark/ReadLargeDocumentBench.php rename to benchmark/src/ReadLargeDocumentBench.php diff --git a/benchmark/ReadMultipleDocumentsBench.php b/benchmark/src/ReadMultipleDocumentsBench.php similarity index 100% rename from benchmark/ReadMultipleDocumentsBench.php rename to benchmark/src/ReadMultipleDocumentsBench.php diff --git a/benchmark/Utils.php b/benchmark/src/Utils.php similarity index 79% rename from benchmark/Utils.php rename to benchmark/src/Utils.php index c19221be4..b77699d5c 100644 --- a/benchmark/Utils.php +++ b/benchmark/src/Utils.php @@ -16,7 +16,7 @@ final class Utils public static function getClient(): Client { - return self::$client ??= new Client(self::getUri()); + return self::$client ??= new Client(self::getUri(), [], ['disableClientPersistence' => true]); } public static function getDatabase(): Database @@ -43,4 +43,11 @@ public static function getCollectionName(): string { return 'perftest'; } + + public static function reset(): void + { + self::$client = null; + self::$database = null; + self::$collection = null; + } } diff --git a/composer.json b/composer.json index 5e890b223..6a585b538 100644 --- a/composer.json +++ b/composer.json @@ -21,7 +21,6 @@ }, "require-dev": { "doctrine/coding-standard": "^11.1", - "phpbench/phpbench": "^1.2", "rector/rector": "^0.16.0", "squizlabs/php_codesniffer": "^3.7", "symfony/phpunit-bridge": "^5.2", @@ -33,13 +32,12 @@ }, "autoload-dev": { "psr-4": { - "MongoDB\\Tests\\": "tests/", - "MongoDB\\Benchmark\\": "benchmark/" + "MongoDB\\Tests\\": "tests/" }, "files": [ "tests/PHPUnit/Functions.php" ] }, "scripts": { - "benchmark": "phpbench run --report=aggregate", + "bench": "cd benchmark && composer update && vendor/bin/phpbench run --report=aggregate", "checks": [ "@check:cs", "@check:psalm", diff --git a/phpcs.xml.dist b/phpcs.xml.dist index 71cc6054d..9cee3b66a 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -9,7 +9,7 @@ - benchmark + benchmark/src src docs/examples examples