mongodb · alcaeus · Oct 30, 2023 · Oct 17, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/.evergreen/config/functions.yml b/.evergreen/config/functions.yml
@@ -482,3 +482,16 @@ functions:
         binary: bash
         args:
           - .evergreen/compile-extension.sh
+
+  # Run benchmarks. The filter skips the benchAmpWorkers subjects as they fail due to socket exceptions
+  "run benchmark":
+    - command: shell.exec
+      type: test
+      params:
+        working_dir: "src/benchmark"
+        script: |
+          ${PREPARE_SHELL}
+          export PATH="${PHP_PATH}/bin:$PATH"
+
+          php ../composer.phar install --no-suggest
+          vendor/bin/phpbench run --report=env --report=evergreen --report=aggregate --output html --filter='bench(?!AmpWorkers)'
diff --git a/.evergreen/config/php.ini b/.evergreen/config/php.ini
@@ -1 +1,2 @@
 extension=mongodb.so
+memory_limit=-1
diff --git a/.evergreen/config/test-tasks.yml b/.evergreen/config/test-tasks.yml
@@ -20,3 +20,15 @@ tasks:
     commands:
       - func: "bootstrap mongohoused"
       - func: "run atlas data lake test"
+
+  - name: "run-benchmark"
+    exec_timeout_secs: 3600
+    commands:
+      - func: "bootstrap mongo-orchestration"
+        vars:
+          TOPOLOGY: "server"
+          MONGODB_VERSION: "v6.0-perf"
+      - func: "run benchmark"
+      - command: perf.send
+        params:
+          file: src/benchmark/.phpbench/results.json
diff --git a/.evergreen/config/test-variants.yml b/.evergreen/config/test-variants.yml
@@ -114,3 +114,18 @@ buildvariants:
     tasks:
       - "test_atlas_task_group"
       - ".csfle"
+
+  # Run benchmarks
+  - name: benchmark-rhel90
+    tags: ["benchmark", "rhel", "x64"]
+    display_name: "Benchmark: RHEL 9.0, MongoDB 6.0"
+    run_on: rhel90-dbx-perf-large
+    expansions:
+      FETCH_BUILD_VARIANT: "build-rhel90"
+      FETCH_BUILD_TASK: "build-php-8.2"
+      PHP_VERSION: "8.2"
+    depends_on:
+      - variant: "build-rhel90"
+        name: "build-php-8.2"
+    tasks:
+      - "run-benchmark"
diff --git a/benchmark/phpbench.json.dist b/benchmark/phpbench.json.dist
@@ -6,6 +6,5 @@
     "runner.file_pattern": "*Bench.php",
     "runner.path": "src",
     "runner.php_config": { "memory_limit": "1G" },
-    "runner.iterations": 3,
-    "runner.revs": 10
+    "runner.iterations": 3
 }
diff --git a/benchmark/src/BSON/DocumentBench.php b/benchmark/src/BSON/DocumentBench.php
@@ -5,13 +5,15 @@
 use MongoDB\Benchmark\Fixtures\Data;
 use MongoDB\BSON\Document;
 use PhpBench\Attributes\BeforeMethods;
+use PhpBench\Attributes\Revs;
 use PhpBench\Attributes\Warmup;
 use stdClass;
 
 use function file_get_contents;
 use function iterator_to_array;
 
 #[BeforeMethods('prepareData')]
+#[Revs(10)]
 #[Warmup(1)]
 final class DocumentBench
 {

diff --git a/benchmark/src/BSON/PackedArrayBench.php b/benchmark/src/BSON/PackedArrayBench.php
@@ -5,12 +5,14 @@
 use MongoDB\Benchmark\Fixtures\Data;
 use MongoDB\BSON\PackedArray;
 use PhpBench\Attributes\BeforeMethods;
+use PhpBench\Attributes\Revs;
 use PhpBench\Attributes\Warmup;
 
 use function array_values;
 use function iterator_to_array;
 
 #[BeforeMethods('prepareData')]
+#[Revs(10)]
 #[Warmup(1)]
 final class PackedArrayBench
 {

diff --git a/benchmark/src/DriverBench/ParallelMultiFileExportBench.php b/benchmark/src/DriverBench/ParallelMultiFileExportBench.php
@@ -15,7 +15,6 @@
 use PhpBench\Attributes\BeforeClassMethods;
 use PhpBench\Attributes\Iterations;
 use PhpBench\Attributes\ParamProviders;
-use PhpBench\Attributes\Revs;
 use RuntimeException;
 
 use function array_chunk;
@@ -44,7 +43,6 @@
 #[AfterClassMethods('afterClass')]
 #[AfterMethods('afterIteration')]
 #[Iterations(1)]
-#[Revs(1)]
 final class ParallelMultiFileExportBench
 {
     public static function beforeClass(): void
@@ -74,15 +72,15 @@ public function afterIteration(): void
      * Using a single thread to export multiple files.
      * By executing a single Find command for multiple files, we can reduce the number of roundtrips to the server.
      *
-     * @param array{chunk:int} $params
+     * @param array{chunkSize:int} $params
      */
     #[ParamProviders(['provideChunkParams'])]
     public function benchSequential(array $params): void
     {
-        foreach (array_chunk(self::getFileNames(), $params['chunk']) as $i => $files) {
+        foreach (array_chunk(self::getFileNames(), $params['chunkSize']) as $i => $files) {
             self::exportFile($files, [], [
-                'limit' => 5_000 * $params['chunk'],
-                'skip' => 5_000 * $params['chunk'] * $i,
+                'limit' => 5_000 * $params['chunkSize'],
+                'skip' => 5_000 * $params['chunkSize'] * $i,
             ]);
         }
     }
@@ -103,12 +101,12 @@ public function benchFork(array $params): void
         Utils::reset();
 
         // Create a child process for each chunk of files
-        foreach (array_chunk(self::getFileNames(), $params['chunk']) as $i => $files) {
+        foreach (array_chunk(self::getFileNames(), $params['chunkSize']) as $i => $files) {
             $pid = pcntl_fork();
             if ($pid === 0) {
                 self::exportFile($files, [], [
-                    'limit' => 5_000 * $params['chunk'],
-                    'skip' => 5_000 * $params['chunk'] * $i,
+                    'limit' => 5_000 * $params['chunkSize'],
+                    'skip' => 5_000 * $params['chunkSize'] * $i,
                 ]);
 
                 // Exit the child process
@@ -133,21 +131,21 @@ public function benchFork(array $params): void
     /**
      * Using amphp/parallel with worker pool
      *
-     * @param array{chunk:int} $params
+     * @param array{chunkSize:int} $params
      */
     #[ParamProviders(['provideChunkParams'])]
     public function benchAmpWorkers(array $params): void
     {
-        $workerPool = new ContextWorkerPool(ceil(100 / $params['chunk']), new ContextWorkerFactory());
+        $workerPool = new ContextWorkerPool(ceil(100 / $params['chunkSize']), new ContextWorkerFactory());
 
         $futures = [];
-        foreach (array_chunk(self::getFileNames(), $params['chunk']) as $i => $files) {
+        foreach (array_chunk(self::getFileNames(), $params['chunkSize']) as $i => $files) {
             $futures[] = $workerPool->submit(
                 new ExportFileTask(
                     files: $files,
                     options: [
-                        'limit' => 5_000 * $params['chunk'],
-                        'skip' => 5_000 * $params['chunk'] * $i,
+                        'limit' => 5_000 * $params['chunkSize'],
+                        'skip' => 5_000 * $params['chunkSize'] * $i,
                     ],
                 ),
             )->getFuture();
@@ -160,13 +158,9 @@ public function benchAmpWorkers(array $params): void
 
     public static function provideChunkParams(): Generator
     {
-        yield 'by 1' => ['chunk' => 1];
-        yield 'by 2' => ['chunk' => 2];
-        yield 'by 4' => ['chunk' => 4];
-        yield 'by 8' => ['chunk' => 8];
-        yield 'by 13' => ['chunk' => 13];
-        yield 'by 20' => ['chunk' => 20];
-        yield 'by 100' => ['chunk' => 100];
+        yield '100 chunks' => ['chunkSize' => 1];
+        yield '25 chunks' => ['chunkSize' => 4];
+        yield '10 chunks' => ['chunkSize' => 10];
     }
 
     /**

diff --git a/benchmark/src/DriverBench/ParallelMultiFileImportBench.php b/benchmark/src/DriverBench/ParallelMultiFileImportBench.php
@@ -16,7 +16,6 @@
 use PhpBench\Attributes\BeforeMethods;
 use PhpBench\Attributes\Iterations;
 use PhpBench\Attributes\ParamProviders;
-use PhpBench\Attributes\Revs;
 use RuntimeException;
 
 use function array_chunk;
@@ -47,7 +46,6 @@
 #[AfterClassMethods('afterClass')]
 #[BeforeMethods('beforeIteration')]
 #[Iterations(1)]
-#[Revs(1)]
 final class ParallelMultiFileImportBench
 {
     public static function beforeClass(): void
@@ -73,20 +71,6 @@ public function beforeIteration(): void
         $database->createCollection(Utils::getCollectionName());
     }
 
-    /**
-     * Using Driver's BulkWrite in a single thread.
-     * The number of files to import in each iteration is controlled by the "chunk" parameter.
-     *
-     * @param array{chunk:int} $params
-     */
-    #[ParamProviders(['provideChunkParams'])]
-    public function benchBulkWrite(array $params): void
-    {
-        foreach (array_chunk(self::getFileNames(), $params['chunk']) as $files) {
-            self::importFile($files);
-        }
-    }
-
     /**
      * Using library's Collection::insertMany in a single thread
      */
@@ -116,7 +100,7 @@ public function benchInsertMany(): void
      * Using multiple forked threads. The number of threads is controlled by the "chunk" parameter,
      * which is the number of files to import in each thread.
      *
-     * @param array{chunk:int} $params
+     * @param array{chunkSize:int} $params
      */
     #[ParamProviders(['provideChunkParams'])]
     public function benchFork(array $params): void
@@ -128,7 +112,7 @@ public function benchFork(array $params): void
         // of a new libmongoc client.
         Utils::reset();
 
-        foreach (array_chunk(self::getFileNames(), $params['chunk']) as $files) {
+        foreach (array_chunk(self::getFileNames(), $params['chunkSize']) as $files) {
             $pid = pcntl_fork();
             if ($pid === 0) {
                 self::importFile($files);
@@ -155,16 +139,16 @@ public function benchFork(array $params): void
     /**
      * Using amphp/parallel with worker pool
      *
-     * @param array{processes:int} $params
+     * @param array{chunkSize:int} $params
      */
     #[ParamProviders(['provideChunkParams'])]
     public function benchAmpWorkers(array $params): void
     {
-        $workerPool = new ContextWorkerPool(ceil(100 / $params['chunk']), new ContextWorkerFactory());
+        $workerPool = new ContextWorkerPool(ceil(100 / $params['chunkSize']), new ContextWorkerFactory());
 
         $futures = array_map(
             fn ($files) => $workerPool->submit(new ImportFileTask($files))->getFuture(),
-            array_chunk(self::getFileNames(), $params['chunk']),
+            array_chunk(self::getFileNames(), $params['chunkSize']),
         );
 
         foreach (Future::iterate($futures) as $future) {
@@ -176,13 +160,9 @@ public function benchAmpWorkers(array $params): void
 
     public function provideChunkParams(): Generator
     {
-        yield 'by 1' => ['chunk' => 1];
-        yield 'by 2' => ['chunk' => 2];
-        yield 'by 4' => ['chunk' => 4];
-        yield 'by 8' => ['chunk' => 8];
-        yield 'by 13' => ['chunk' => 13];
-        yield 'by 20' => ['chunk' => 20];
-        yield 'by 100' => ['chunk' => 100];
+        yield '100 chunks' => ['chunkSize' => 1];
+        yield '25 chunks' => ['chunkSize' => 4];
+        yield '10 chunks' => ['chunkSize' => 10];
     }
 
     /**

diff --git a/benchmark/src/DriverBench/SingleDocBench.php b/benchmark/src/DriverBench/SingleDocBench.php
@@ -9,7 +9,6 @@
 use MongoDB\Driver\Command;
 use PhpBench\Attributes\BeforeMethods;
 use PhpBench\Attributes\ParamProviders;
-use PhpBench\Attributes\Revs;
 
 use function array_map;
 use function file_get_contents;
@@ -45,7 +44,6 @@ public function benchRunCommand(): void
      */
     #[BeforeMethods('beforeFindOneById')]
     #[ParamProviders('provideFindOneByIdParams')]
-    #[Revs(1)]
     public function benchFindOneById(array $params): void
     {
         $collection = Utils::getCollection();
@@ -79,7 +77,6 @@ public static function provideFindOneByIdParams(): Generator
      * @param array{document: object|array, repeat: int, options?: array} $params
      */
     #[ParamProviders('provideInsertOneParams')]
-    #[Revs(1)]
     public function benchInsertOne(array $params): void
     {
         $collection = Utils::getCollection();
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		extension=mongodb.so
		memory_limit=-1
Copy link Member GromNaN Oct 20, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. You should increase the limit. Otherwise we don't know how much memory is used if it's too much. What is the memory limit of the job runner? Copy link Member Author alcaeus Oct 20, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. For some reason it aborted when it hit a previous 128M limit, apparently ignoring the 1G limit definer in the runner config. We can also report memory usage from the benchmarks using `perf.send`, which would be a better indicator than failing the build when it hits some limit. Want me to add those numbers? Copy link Member GromNaN Oct 20, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. `perf.send` is too late if the process crashes, no? Copy link Member Author alcaeus Oct 20, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Correct, `perf.send` would not be executed in that case. With `-1` the memory would be unlimited, so the only case in which it would crash is if it used up all memory including the page file, which I'd consider highly unlikely.