Skip to content

Commit

Permalink
PHPLIB-1237 Implement parallel benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
GromNaN committed Sep 19, 2023
1 parent ec6c431 commit 4dc5cd6
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 0 deletions.
193 changes: 193 additions & 0 deletions benchmark/DriverBench/ParallelBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
<?php

namespace MongoDB\Benchmark\DriverBench;

use Generator;
use MongoDB\Benchmark\Fixtures\Data;
use MongoDB\Benchmark\Utils;
use MongoDB\BSON\Document;
use MongoDB\Collection;
use PhpBench\Attributes\AfterClassMethods;
use PhpBench\Attributes\BeforeClassMethods;
use PhpBench\Attributes\BeforeMethods;
use PhpBench\Attributes\ParamProviders;
use PhpBench\Attributes\Revs;
use RuntimeException;

use function array_chunk;
use function array_map;
use function ceil;
use function count;
use function file;
use function file_get_contents;
use function file_put_contents;
use function is_dir;
use function mkdir;
use function pcntl_fork;
use function pcntl_waitpid;
use function range;
use function sprintf;
use function str_repeat;
use function sys_get_temp_dir;
use function unlink;

use const FILE_IGNORE_NEW_LINES;
use const FILE_NO_DEFAULT_CONTEXT;
use const FILE_SKIP_EMPTY_LINES;

/**
* For accurate results, run benchmarks on a standalone server.
*
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#parallel
*/
#[BeforeClassMethods('beforeClass')]
#[AfterClassMethods('afterClass')]
final class ParallelBench
{
/** @var string[] */
private static array $files = [];

public static function beforeClass(): void
{
// Generate files
$fileContents = str_repeat(file_get_contents(Data::LDJSON_FILE_PATH), 5_000);
foreach (self::getFileNames() as $file) {
file_put_contents($file, $fileContents);
}
}

public static function afterClass(): void
{
foreach (self::getFileNames() as $file) {
unlink($file);
}

self::$files = [];
}

/**
* Parallel: LDJSON multi-file import
* Using single thread
*
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
*/
#[BeforeMethods('beforeMultiFileImport')]
#[Revs(1)]
public function benchMultiFileImport(): void
{
$collection = Utils::getCollection();
foreach (self::getFileNames() as $file) {
self::importFile($file, $collection);
}
}

/**
* Parallel: LDJSON multi-file import
* Using multiple forked threads
*
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
* @param array{processes:int, files:string[], batchSize:int} $params
*/
#[BeforeMethods('beforeMultiFileImport')]
#[ParamProviders(['provideProcessesParameter', 'provideMultiFileImportParameters'])]
#[Revs(1)]
public function benchMultiFileImportFork(array $params): void
{
$pids = [];
foreach ($params['files'] as $files) {
// Wait for a child process to finish if we have reached the maximum number of processes
if (count($pids) >= $params['processes']) {
$pid = pcntl_waitpid(-1, $status);
unset($pids[$pid]);
}

$pid = pcntl_fork();
if ($pid === 0) {
// If we reset, we can garantee that we get a new manager in the child process
// If we don't reset, we will get the same manager client_zval in the child process
// and share the libmongoc client.
Utils::reset();
$collection = Utils::getCollection();

foreach ($files as $file) {
self::importFile($file, $collection);
}

// Exit the child process
exit(0);
}

if ($pid === -1) {
throw new RuntimeException('Failed to fork');
}

// Keep the forked process id to wait for it later
$pids[$pid] = true;
}

// Wait for all child processes to finish
while ($pids !== []) {
$pid = pcntl_waitpid(-1, $status);
unset($pids[$pid]);
}
}

public static function provideProcessesParameter(): Generator
{
// Max number of forked processes
for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) {
yield $i . 'fork' => ['processes' => $i];
}
}

public static function provideMultiFileImportParameters(): Generator
{
$files = self::getFileNames();

// Chunk of file names to be handled by each processes
for ($i = 1; $i <= 10; $i += 3) {
yield 'by ' . $i => ['files' => array_chunk($files, $i)];
}
}

public function beforeMultiFileImport(): void
{
$database = Utils::getDatabase();
$database->drop();
$database->createCollection(Utils::getCollectionName());
}

public function afterMultiFileImport(): void
{
foreach (self::$files as $file) {
unlink($file);
}

unset($this->files);
}

private static function importFile(string $file, Collection $collection): void
{
// Read file contents into BSON documents
$docs = array_map(
static fn (string $line) => Document::fromJSON($line),
file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT),
);
// Insert documents in bulk
$collection->insertMany($docs);
}

private static function getFileNames(): array
{
$tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark';
if (! is_dir($tempDir)) {
mkdir($tempDir);
}

return array_map(
static fn (int $i) => sprintf('%s/%03d.txt', $tempDir, $i),
//range(0, 99),
range(0, 5),
);
}
}
1 change: 1 addition & 0 deletions benchmark/Fixtures/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ final class Data
public const LARGE_FILE_PATH = __DIR__ . '/data/large_doc.json';
public const SMALL_FILE_PATH = __DIR__ . '/data/small_doc.json';
public const TWEET_FILE_PATH = __DIR__ . '/data/tweet.json';
public const LDJSON_FILE_PATH = __DIR__ . '/data/ldjson.json';

public static function readJsonFile(string $path): array
{
Expand Down
1 change: 1 addition & 0 deletions benchmark/Fixtures/data/ldjson.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"text":"@wildfits you're not getting one.....","in_reply_to_status_id":22773233453,"retweet_count":null,"contributors":null,"created_at":"Thu Sep 02 19:38:18 +0000 2010","geo":null,"source":"web","coordinates":null,"in_reply_to_screen_name":"wildfits","truncated":false,"entities":{"user_mentions":[{"indices":[0,9],"screen_name":"wildfits","name":"Mairin Goetzinger","id":41832464}],"urls":[],"hashtags":[]},"retweeted":false,"place":null,"user":{"friends_count":179,"profile_sidebar_fill_color":"7a7a7a","location":"Minneapols, MN/Brookings SD","verified":false,"follow_request_sent":null,"favourites_count":0,"profile_sidebar_border_color":"a3a3a3","profile_image_url":"http://a1.twimg.com/profile_images/1110614677/Screen_shot_2010-08-25_at_10.12.40_AM_normal.png","geo_enabled":false,"created_at":"Sun Aug 17 00:23:13 +0000 2008","description":"graphic designer + foodie, with a love of music, movies, running, design, + the outdoors!","time_zone":"Mountain Time (US & Canada)","url":"http://jessiefarris.com/","screen_name":"jessiekf","notifications":null,"profile_background_color":"303030","listed_count":1,"lang":"en"}}
7 changes: 7 additions & 0 deletions benchmark/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,11 @@ public static function getCollectionName(): string
{
return 'perftest';
}

public static function reset(): void
{
self::$client = null;
self::$database = null;
self::$collection = null;
}
}

0 comments on commit 4dc5cd6

Please sign in to comment.