Skip to content

Commit

Permalink
Full sync: Set chunk size of comments dynamically (#41350)
Browse files Browse the repository at this point in the history
* Move filter_objects_and_metadata_by_size to module class and use it by posts and comments

* changelog

* Fixed tests

* changelog

* ID for comments is not an int so let's cast before comparing

* Added tests for comments

* Id field for posts is ID

* Added tests to cover filter_objects_and_metadata_by_size

* Replace deprecated method

* Typo in docblocks
  • Loading branch information
darssen authored Jan 30, 2025
1 parent a19dccb commit d705b87
Show file tree
Hide file tree
Showing 8 changed files with 404 additions and 65 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: minor
Type: added

Sync: Full Sync comments now send dynamic chunks if chunk size default is too big
36 changes: 33 additions & 3 deletions projects/packages/sync/src/modules/class-comments.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,25 @@
* Class to handle sync for comments.
*/
class Comments extends Module {

/**
* Max bytes allowed for full sync upload.
* Current Setting : 7MB.
*
* @access public
*
* @var int
*/
const MAX_SIZE_FULL_SYNC = 7000000;
/**
* Max bytes allowed for post meta_value => length.
* Current Setting : 2MB.
*
* @access public
*
* @var int
*/
const MAX_COMMENT_META_LENGTH = 2000000;
/**
* Sync module name.
*
Expand Down Expand Up @@ -573,10 +592,21 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
}
// Get the comment IDs from the comments that were fetched.
$fetched_comment_ids = wp_list_pluck( $comments, 'comment_ID' );
$metadata = $this->get_metadata( $fetched_comment_ids, 'comment', Settings::get_setting( 'comment_meta_whitelist' ) );

// Filter the comments and metadata based on the maximum size constraints.
list( $filtered_comment_ids, $filtered_comments, $filtered_comments_metadata ) = $this->filter_objects_and_metadata_by_size(
'comment',
$comments,
$metadata,
self::MAX_COMMENT_META_LENGTH, // Replace with appropriate comment meta length constant.
self::MAX_SIZE_FULL_SYNC
);

return array(
'object_ids' => $comment_ids, // Still send the original comment IDs since we need them to update the status.
'objects' => $comments,
'meta' => $this->get_metadata( $fetched_comment_ids, 'comment', Settings::get_setting( 'comment_meta_whitelist' ) ),
'object_ids' => $filtered_comment_ids,
'objects' => $filtered_comments,
'meta' => $filtered_comments_metadata,
);
}

Expand Down
58 changes: 58 additions & 0 deletions projects/packages/sync/src/modules/class-module.php
Original file line number Diff line number Diff line change
Expand Up @@ -691,4 +691,62 @@ public function total( $config ) {
public function get_where_sql( $config ) { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable
return '1=1';
}

/**
* Filters objects and metadata based on maximum size constraints.
* It always allows the first object with its metadata, even if they exceed the limit.
*
* @access public
*
* @param string $type The type of objects to filter (e.g., 'post' or 'comment').
* @param array $objects The array of objects to filter (e.g., posts or comments).
* @param array $metadata The array of metadata to filter.
* @param int $max_meta_size Maximum size for individual objects.
* @param int $max_total_size Maximum combined size for objects and metadata.
* @return array An array containing the filtered object IDs, filtered objects, and filtered metadata.
*/
public function filter_objects_and_metadata_by_size( $type, $objects, $metadata, $max_meta_size, $max_total_size ) {
$filtered_objects = array();
$filtered_metadata = array();
$filtered_object_ids = array();
$current_size = 0;

foreach ( $objects as $object ) {
$object_size = strlen( maybe_serialize( $object ) );
$current_metadata = array();
$metadata_size = 0;

foreach ( $metadata as $key => $metadata_item ) {
if ( (int) $metadata_item->{$type . '_id'} === (int) $object->{$this->id_field()} ) {
$metadata_item_size = strlen( maybe_serialize( $metadata_item->meta_value ) );
if ( $metadata_item_size >= $max_meta_size ) {
$metadata_item->meta_value = ''; // Trim metadata if too large.
}
$current_metadata[] = $metadata_item;
$metadata_size += $metadata_item_size >= $max_meta_size ? 0 : $metadata_item_size;

if ( ! empty( $filtered_object_ids ) && ( $current_size + $object_size + $metadata_size ) > $max_total_size ) {
break 2; // Exit both loops.
}
unset( $metadata[ $key ] );
}
}

// Always allow the first object with metadata.
if ( empty( $filtered_object_ids ) || ( $current_size + $object_size + $metadata_size ) <= $max_total_size ) {
$filtered_object_ids[] = strval( $object->{$this->id_field()} );
$filtered_objects[] = $object;
$filtered_metadata = array_merge( $filtered_metadata, $current_metadata );
$current_size += $object_size + $metadata_size;
} else {
break;
}
}

return array(
$filtered_object_ids,
$filtered_objects,
$filtered_metadata,
);
}
}
77 changes: 22 additions & 55 deletions projects/packages/sync/src/modules/class-posts.php
Original file line number Diff line number Diff line change
Expand Up @@ -875,11 +875,29 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
return array();
}

$posts = $this->expand_posts( $post_ids );
$posts_metadata = $this->get_metadata( $post_ids, 'post', Settings::get_setting( 'post_meta_whitelist' ) );
$posts = $this->expand_posts( $post_ids );

// If no posts were fetched, make sure to return the expected structure so that status is updated correctly.
if ( empty( $posts ) ) {
return array(
'object_ids' => $post_ids,
'objects' => array(),
'meta' => array(),
);
}
// Get the post IDs from the posts that were fetched.
$fetched_post_ids = wp_list_pluck( $posts, 'ID' );
$metadata = $this->get_metadata( $fetched_post_ids, 'post', Settings::get_setting( 'post_meta_whitelist' ) );

// Filter the posts and metadata based on the maximum size constraints.
list( $filtered_post_ids, $filtered_posts, $filtered_posts_metadata ) = $this->filter_objects_and_metadata_by_size(
'post',
$posts,
$metadata,
self::MAX_POST_META_LENGTH,
self::MAX_SIZE_FULL_SYNC
);

// Filter posts and metadata based on maximum size constraints.
list( $filtered_post_ids, $filtered_posts, $filtered_posts_metadata ) = $this->filter_posts_and_metadata_max_size( $posts, $posts_metadata );
return array(
'object_ids' => $filtered_post_ids,
'objects' => $filtered_posts,
Expand All @@ -901,57 +919,6 @@ private function expand_posts( $post_ids ) {
return $posts;
}

/**
* Filters posts and metadata based on maximum size constraints.
* It always allows the first post with its metadata even if they exceed the limit, otherwise they will never be synced.
*
* @access public
*
* @param array $posts The array of posts to filter.
* @param array $metadata The array of metadata to filter.
* @return array An array containing the filtered post IDs, filtered posts, and filtered metadata.
*/
public function filter_posts_and_metadata_max_size( $posts, $metadata ) {
$filtered_posts = array();
$filtered_metadata = array();
$filtered_post_ids = array();
$current_size = 0;
foreach ( $posts as $post ) {
$post_content_size = isset( $post->post_content ) ? strlen( $post->post_content ) : 0;
$current_metadata = array();
$metadata_size = 0;
foreach ( $metadata as $key => $metadata_item ) {
if ( (int) $metadata_item->post_id === $post->ID ) {
// Trimming metadata if it exceeds limit. Similar to trim_post_meta.
$metadata_item_size = strlen( maybe_serialize( $metadata_item->meta_value ) );
if ( $metadata_item_size >= self::MAX_POST_META_LENGTH ) {
$metadata_item->meta_value = '';
}
$current_metadata[] = $metadata_item;
$metadata_size += $metadata_item_size >= self::MAX_POST_META_LENGTH ? 0 : $metadata_item_size;
if ( ! empty( $filtered_post_ids ) && ( $current_size + $post_content_size + $metadata_size ) > ( self::MAX_SIZE_FULL_SYNC ) ) {
break 2; // Break both foreach loops.
}
unset( $metadata[ $key ] );
}
}
// Always allow the first post with its metadata.
if ( empty( $filtered_post_ids ) || ( $current_size + $post_content_size + $metadata_size ) <= ( self::MAX_SIZE_FULL_SYNC ) ) {
$filtered_post_ids[] = strval( $post->ID );
$filtered_posts[] = $post;
$filtered_metadata = array_merge( $filtered_metadata, $current_metadata );
$current_size += $post_content_size + $metadata_size;
} else {
break;
}
}
return array(
$filtered_post_ids,
$filtered_posts,
$filtered_metadata,
);
}

/**
* Set the status of the full sync action based on the objects that were sent.
*
Expand Down
107 changes: 107 additions & 0 deletions projects/packages/sync/tests/php/modules/test-module.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?php // phpcs:ignore WordPress.Files.FileName.InvalidClassFileName

/**
* Test file for Automattic\Jetpack\Sync\Modules\Module
*
* @package automattic/jetpack-masterbar
*/

namespace Automattic\Jetpack\Sync;

use WorDBless\BaseTestCase;

/**
* Class Test_Module
*
* @covers Automattic\Jetpack\Sync\Modules\Module
*/
class Test_Module extends BaseTestCase {

/**
* The module instance.
*
* @var \Automattic\Jetpack\Sync\Modules\Module
*/
private $module_instance;

/**
* Runs before every test in this class.
*/
protected function setUp(): void {
parent::setUp();
$this->module_instance = $this->getMockBuilder( 'Automattic\Jetpack\Sync\Modules\Module' )
->onlyMethods( array( 'id_field', 'name' ) )
->getMock();
$this->module_instance->method( 'id_field' )->willReturn( 'ID' );
$this->module_instance->method( 'name' )->willReturn( 'module' );
}
/**
* Test filter_objects_and_metadata_by_size with no constraints of size
*/
public function filter_objects_and_metadata_by_size_no_constraints() {
$objects = array(
(object) array(
'ID' => 1,
'module_title' => 'Post 1',
),
);
$metadata = array(
(object) array(
'module_id' => 1,
'meta_value' => 'meta1',
),
);

$result = $this->module_instance->filter_objects_and_metadata_by_size( 'module', $objects, $metadata, PHP_INT_MAX, PHP_INT_MAX );

$this->assertCount( 1, $result[0] );
$this->assertCount( 1, $result[1] );
$this->assertCount( 1, $result[2] );
}
/**
* Test filter_objects_and_metadata_by_size with constraints of size for metadata
*/
public function test_filter_objects_exceeding_max_meta_size() {
$objects = array(
(object) array(
'ID' => 1,
'module_title' => 'Post 1',
),
);
$metadata = array(
(object) array(
'module_id' => 1,
'meta_value' => str_repeat( 'a', 100 ),
),
);

$result = $this->module_instance->filter_objects_and_metadata_by_size( 'module', $objects, $metadata, 50, 200 );

$this->assertSame( '', $result[2][0]->meta_value );
}
/**
* Test filter_objects_and_metadata_by_size with constraints of size for total size
*/
public function test_filter_objects_exceeding_max_total_size() {
$objects = array(
(object) array(
'ID' => 1,
'module_title' => 'Post 1',
),
(object) array(
'ID' => 2,
'module_title' => 'Post 2',
),
);
$metadata = array(
(object) array(
'module_id' => 1,
'meta_value' => 'meta1',
),
);

$result = $this->module_instance->filter_objects_and_metadata_by_size( 'module', $objects, $metadata, 50, 10 );

$this->assertCount( 1, $result[0] ); // Should only include the first object
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: minor
Type: other

Sync: Full Sync comments now send dynamic chunks if chunk size default is too big
Loading

0 comments on commit d705b87

Please sign in to comment.