diff --git a/lib/experimental/html/class-wp-html-span.php b/lib/experimental/html/class-wp-html-span.php new file mode 100644 index 00000000000000..39e603662b17b9 --- /dev/null +++ b/lib/experimental/html/class-wp-html-span.php @@ -0,0 +1,52 @@ +start = $start; + $this->end = $end; + } +} diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 5dc5981ef15f78..affbb6fb27b5c1 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -180,6 +180,25 @@ * @since 6.2.0 */ class WP_HTML_Tag_Processor { + /** + * The maximum number of bookmarks allowed to exist at + * any given time. + * + * @see set_bookmark(); + * @since 6.2.0 + * @var int + */ + const MAX_BOOKMARKS = 10; + + /** + * Maximum number of times seek() can be called. + * Prevents accidental infinite loops. + * + * @see seek() + * @since 6.2.0 + * @var int + */ + const MAX_SEEK_OPS = 1000; /** * The HTML document to parse. @@ -349,11 +368,11 @@ class WP_HTML_Tag_Processor { * * Example: * - * // Add the `WP-block-group` class, remove the `WP-group` class. - * $class_changes = [ + * // Add the `wp-block-group` class, remove the `wp-group` class. + * $classname_updates = [ * // Indexed by a comparable class name - * 'wp-block-group' => new WP_Class_Name_Operation( 'WP-block-group', WP_Class_Name_Operation::ADD ), - * 'wp-group' => new WP_Class_Name_Operation( 'WP-group', WP_Class_Name_Operation::REMOVE ) + * 'wp-block-group' => WP_HTML_Tag_Processor::ADD_CLASS, + * 'wp-group' => WP_HTML_Tag_Processor::REMOVE_CLASS * ]; * * @@ -362,6 +381,15 @@ class WP_HTML_Tag_Processor { */ private $classname_updates = array(); + /** + * Tracks a semantic location in the original HTML which + * shifts with updates as they are applied to the document. + * + * @since 6.2.0 + * @var WP_HTML_Span[] + */ + private $bookmarks = array(); + const ADD_CLASS = true; const REMOVE_CLASS = false; const SKIP_CLASS = null; @@ -396,6 +424,16 @@ class WP_HTML_Tag_Processor { */ private $attribute_updates = array(); + /** + * Tracks how many times we've performed a `seek()` + * so that we can prevent accidental infinite loops. + * + * @see seek + * @since 6.2.0 + * @var int + */ + private $seek_count = 0; + /** * Constructor. * @@ -479,6 +517,123 @@ public function next_tag( $query = null ) { return true; } + + /** + * Sets a bookmark in the HTML document. + * + * Bookmarks represent specific places or tokens in the HTML + * document, such as a tag opener or closer. When applying + * edits to a document, such as setting an attribute, the + * text offsets of that token may shift; the bookmark is + * kept updated with those shifts and remains stable unless + * the entire span of text in which the token sits is removed. + * + * Release bookmarks when they are no longer needed. + * + * Example: + * ``` + *

Surprising fact you may not know!

+ * ^ ^ + * \-|-- this `H2` opener bookmark tracks the token + * + *

Surprising fact you may no… + * ^ ^ + * \-|-- it shifts with edits + * ``` + * + * Bookmarks provide the ability to seek to a previously-scanned + * place in the HTML document. This avoids the need to re-scan + * the entire thing. + * + * Example: + * ``` + * + * ^^^^ + * want to note this last item + * + * $p = new WP_HTML_Tag_Processor( $html ); + * $in_list = false; + * while ( $p->next_tag( [ 'tag_closers' => $in_list ? 'visit' : 'skip' ] ) ) { + * if ( 'UL' === $p->get_tag() ) { + * if ( $p->is_tag_closer() ) { + * $in_list = false; + * $p->set_bookmark( 'resume' ); + * if ( $p->seek( 'last-li' ) ) { + * $p->add_class( 'last-li' ); + * } + * $p->seek( 'resume' ); + * $p->release_bookmark( 'last-li' ); + * $p->release_bookmark( 'resume' ); + * } else { + * $in_list = true; + * } + * } + * + * if ( 'LI' === $p->get_tag() ) { + * $p->set_bookmark( 'last-li' ); + * } + * } + * ``` + * + * Because bookmarks maintain their position they don't + * expose any internal offsets for the HTML document + * and can't be used with normal string functions. + * + * Because bookmarks allocate memory and require processing + * for every applied update they are limited and require + * a name. They should not be created inside a loop. + * + * Bookmarks are a powerful tool to enable complicated behavior; + * consider double-checking that you need this tool if you are + * reaching for it, as inappropriate use could lead to broken + * HTML structure or unwanted processing overhead. + * + * @param string $name Identifies this particular bookmark. + * @return false|void + * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + */ + public function set_bookmark( $name ) { + if ( null === $this->tag_name_starts_at ) { + return false; + } + + if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( "Tried to jump to a non-existent HTML bookmark {$name}." ); + } + return false; + } + + $this->bookmarks[ $name ] = new WP_HTML_Span( + $this->tag_name_starts_at - 1, + $this->tag_ends_at + ); + + return true; + } + + + /** + * Removes a bookmark if you no longer need to use it. + * + * Releasing a bookmark frees up the small performance + * overhead they require, mainly in the form of compute + * costs when modifying the document. + * + * @param string $name Name of the bookmark to remove. + * @return bool + */ + public function release_bookmark( $name ) { + if ( ! array_key_exists( $name, $this->bookmarks ) ) { + return false; + } + + unset( $this->bookmarks[ $name ] ); + + return true; + } + + /** * Skips the contents of the title and textarea tags until an appropriate * tag closer is found. @@ -1104,9 +1259,77 @@ private function apply_attributes_updates() { $this->updated_bytes = $diff->end; } + foreach ( $this->bookmarks as $bookmark ) { + /** + * As we loop through $this->attribute_updates, we keep comparing + * $bookmark->start and $bookmark->end to $diff->start. We can't + * change it and still expect the correct result, so let's accumulate + * the deltas separately and apply them all at once after the loop. + */ + $head_delta = 0; + $tail_delta = 0; + + foreach ( $this->attribute_updates as $diff ) { + $update_head = $bookmark->start >= $diff->start; + $update_tail = $bookmark->end >= $diff->start; + + if ( ! $update_head && ! $update_tail ) { + break; + } + + $delta = strlen( $diff->text ) - ( $diff->end - $diff->start ); + + if ( $update_head ) { + $head_delta += $delta; + } + + if ( $update_tail ) { + $tail_delta += $delta; + } + } + + $bookmark->start += $head_delta; + $bookmark->end += $tail_delta; + } + $this->attribute_updates = array(); } + /** + * Move the current pointer in the Tag Processor to a given bookmark's location. + * + * In order to prevent accidental infinite loops, there's a + * maximum limit on the number of times seek() can be called. + * + * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. + * @return bool + * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + */ + public function seek( $bookmark_name ) { + if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( 'Invalid bookmark name' ); + } + return false; + } + + if ( ++$this->seek_count > self::MAX_SEEK_OPS ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( 'Too many calls to seek() - this can lead to performance issues.' ); + } + return false; + } + + // Flush out any pending updates to the document. + $this->get_updated_html(); + + // Point this tag processor before the sought tag opener and consume it. + $this->parsed_bytes = $this->bookmarks[ $bookmark_name ]->start; + $this->updated_bytes = $this->parsed_bytes; + $this->updated_html = substr( $this->html, 0, $this->updated_bytes ); + return $this->next_tag(); + } + /** * Sort function to arrange objects with a start property in ascending order. * @@ -1411,47 +1634,31 @@ public function __toString() { * @return string The processed HTML. */ public function get_updated_html() { - // Short-circuit if there are no updates to apply. + // Short-circuit if there are no new updates to apply. if ( ! count( $this->classname_updates ) && ! count( $this->attribute_updates ) ) { return $this->updated_html . substr( $this->html, $this->updated_bytes ); } - /* - * Parsing is in progress – let's apply the attribute updates without moving on to the next tag. - * - * In practice: - * 1. Apply the attributes updates to the original HTML - * 2. Replace the original HTML with the updated HTML - * 3. Point this tag processor to the current tag name's end in that updated HTML - */ - - // Find tag name's end in the updated markup. - $markup_updated_up_to_a_tag_name_end = $this->updated_html . substr( $this->html, $this->updated_bytes, $this->tag_name_starts_at + $this->tag_name_length - $this->updated_bytes ); - $updated_tag_name_ends_at = strlen( $markup_updated_up_to_a_tag_name_end ); - $updated_tag_name_starts_at = $updated_tag_name_ends_at - $this->tag_name_length; + // Otherwise: apply the updates, rewind before the current tag, and parse it again. + $delta_between_updated_html_end_and_current_tag_end = substr( + $this->html, + $this->updated_bytes, + $this->tag_name_starts_at + $this->tag_name_length - $this->updated_bytes + ); + $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end; - // Apply attributes updates. - $this->updated_html = $markup_updated_up_to_a_tag_name_end; - $this->updated_bytes = $this->tag_name_starts_at + $this->tag_name_length; + // 1. Apply the attributes updates to the original HTML $this->class_name_updates_to_attributes_updates(); $this->apply_attributes_updates(); - // Replace $this->html with the updated markup. - $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes ); + // 2. Replace the original HTML with the updated HTML + $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes ); + $this->updated_html = $updated_html_up_to_current_tag_name_end; + $this->updated_bytes = strlen( $this->updated_html ); - // Rewind this processor to the tag name's end. - $this->tag_name_starts_at = $updated_tag_name_starts_at; - $this->parsed_bytes = $updated_tag_name_ends_at; - - // Restore the previous version of the updated_html as we are not finished with the current_tag yet. - $this->updated_html = $markup_updated_up_to_a_tag_name_end; - $this->updated_bytes = $updated_tag_name_ends_at; - - // Parse the attributes in the updated markup. - $this->attributes = array(); - while ( $this->parse_next_attribute() ) { - continue; - } + // 3. Point this tag processor at the original tag opener and consume it + $this->parsed_bytes = strlen( $updated_html_up_to_current_tag_name_end ) - $this->tag_name_length - 2; + $this->next_tag(); return $this->html; } diff --git a/lib/experimental/html/index.php b/lib/experimental/html/index.php index e7d41f8cdf4863..a31dbaf48c6b2a 100644 --- a/lib/experimental/html/index.php +++ b/lib/experimental/html/index.php @@ -7,5 +7,6 @@ // All class files necessary for the HTML Tag Processor. require_once __DIR__ . '/class-wp-html-attribute-token.php'; +require_once __DIR__ . '/class-wp-html-span.php'; require_once __DIR__ . '/class-wp-html-text-replacement.php'; require_once __DIR__ . '/class-wp-html-tag-processor.php'; diff --git a/phpunit/html/wp-html-tag-processor-bookmark-test.php b/phpunit/html/wp-html-tag-processor-bookmark-test.php new file mode 100644 index 00000000000000..e1c4b005ce47cd --- /dev/null +++ b/phpunit/html/wp-html-tag-processor-bookmark-test.php @@ -0,0 +1,370 @@ +
  • One
  • Two
  • Three
  • ' ); + $p->next_tag( 'li' ); + $this->assertTrue( $p->set_bookmark( 'first li' ), 'Could not allocate a "first li" bookmark.' ); + $p->next_tag( 'li' ); + $this->assertTrue( $p->set_bookmark( 'second li' ), 'Could not allocate a "second li" bookmark.' ); + $this->assertTrue( $p->set_bookmark( 'first li' ), 'Could not move the "first li" bookmark.' ); + } + + /** + * @ticket 56299 + * + * @covers release_bookmark + */ + public function test_release_bookmark() { + $p = new WP_HTML_Tag_Processor( '' ); + $p->next_tag( 'li' ); + $this->assertFalse( $p->release_bookmark( 'first li' ), 'Released a non-existing bookmark.' ); + $p->set_bookmark( 'first li' ); + $this->assertTrue( $p->release_bookmark( 'first li' ), 'Could not release a bookmark.' ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_seek() { + $p = new WP_HTML_Tag_Processor( '' ); + $p->next_tag( 'li' ); + $p->set_bookmark( 'first li' ); + + $p->next_tag( 'li' ); + $p->set_attribute( 'foo-2', 'bar-2' ); + + $p->seek( 'first li' ); + $p->set_attribute( 'foo-1', 'bar-1' ); + + $this->assertEquals( + '', + $p->get_updated_html() + ); + } + + /** + * WP_HTML_Tag_Processor used to test for the diffs affecting + * the adjusted bookmark position while simultaneously adjusting + * the bookmark in question. As a result, updating the bookmarks + * of a next tag while removing two subsequent attributes in + * a previous tag unfolded like this: + * + * 1. Check if the first removed attribute is before the bookmark: + * + * + * ^-------------------^ ^ + * diff applied here the bookmark is here + * + * (Yes it is) + * + * 2. Move the bookmark to the left by the attribute length: + * + * + * ^ + * the bookmark is here + * + * 3. Check if the second removed attribute is before the bookmark: + * + * + * ^ ^-----^ + * bookmark diff + * + * This time, it isn't! + * + * The fix in the WP_HTML_Tag_Processor involves doing all the checks + * before moving the bookmark. This test is here to guard us from + * the erroneous behavior accidentally returning one day. + * + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + * @covers apply_attributes_updates + */ + public function test_removing_long_attributes_doesnt_break_seek() { + $input = << +HTML; + $p = new WP_HTML_Tag_Processor( $input ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'first' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'second' ); + + $this->assertTrue( + $p->seek( 'first' ), + 'Seek() to the first button has failed' + ); + $p->remove_attribute( 'twenty_one_characters' ); + $p->remove_attribute( '7_chars' ); + + $this->assertTrue( + $p->seek( 'second' ), + 'Seek() to the second button has failed' + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_bookmarks_complex_use_case() { + $input = << +
    +
    +
    + + + + + + + +
    +
    +
    +HTML; + $expected_output = << +
    +
    +
    + + + + + + + +
    +
    +
    +HTML; + $p = new WP_HTML_Tag_Processor( $input ); + $p->next_tag( 'div' ); + $p->next_tag( 'div' ); + $p->next_tag( 'div' ); + $p->set_bookmark( 'first div' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'first button' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'second button' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'third button' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'fourth button' ); + + $p->seek( 'first button' ); + $p->set_attribute( 'type', 'submit' ); + + $this->assertTrue( + $p->seek( 'third button' ), + 'Seek() to the third button failed' + ); + $p->remove_attribute( 'class' ); + $p->remove_attribute( 'type' ); + $p->remove_attribute( 'aria-expanded' ); + $p->set_attribute( 'id', 'rebase-and-merge' ); + $p->remove_attribute( 'data-details-container' ); + + $this->assertTrue( + $p->seek( 'first div' ), + 'Seek() to the first div failed' + ); + $p->set_attribute( 'checked', false ); + + $this->assertTrue( + $p->seek( 'fourth button' ), + 'Seek() to the fourth button failed' + ); + $p->set_attribute( 'id', 'last-button' ); + $p->remove_attribute( 'class' ); + $p->remove_attribute( 'type' ); + $p->remove_attribute( 'checked' ); + $p->remove_attribute( 'aria-label' ); + $p->remove_attribute( 'disabled' ); + $p->remove_attribute( 'data-view-component' ); + + $this->assertTrue( + $p->seek( 'second button' ), + 'Seek() to the second button failed' + ); + $p->remove_attribute( 'type' ); + $p->set_attribute( 'class', 'hx_create-pr-button' ); + + $this->assertEquals( + $expected_output, + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_additions_after_both_sides() { + $p = new WP_HTML_Tag_Processor( '
    First
    Second
    ' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->add_class( 'second' ); + + $p->seek( 'first' ); + $p->add_class( 'first' ); + + $this->assertEquals( + '
    First
    Second
    ', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_additions_before_both_sides() { + $p = new WP_HTML_Tag_Processor( '
    First
    Second
    ' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->set_bookmark( 'second' ); + + $p->seek( 'first' ); + $p->add_class( 'first' ); + + $p->seek( 'second' ); + $p->add_class( 'second' ); + + $this->assertEquals( + '
    First
    Second
    ', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_deletions_after_both_sides() { + $p = new WP_HTML_Tag_Processor( '
    First
    Second
    ' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->remove_attribute( 'disabled' ); + + $p->seek( 'first' ); + $p->set_attribute( 'untouched', true ); + + $this->assertEquals( + /** @TODO: we shouldn't have to assert the extra space after removing the attribute. */ + '
    First
    Second
    ', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_deletions_before_both_sides() { + $p = new WP_HTML_Tag_Processor( '
    First
    Second
    ' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->set_bookmark( 'second' ); + + $p->seek( 'first' ); + $p->remove_attribute( 'disabled' ); + + $p->seek( 'second' ); + $p->set_attribute( 'safe', true ); + + $this->assertEquals( + /** @TODO: we shouldn't have to assert the extra space after removing the attribute. */ + '
    First
    Second
    ', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_bookmark + */ + public function test_limits_the_number_of_bookmarks() { + $p = new WP_HTML_Tag_Processor( '' ); + $p->next_tag( 'li' ); + + $this->expectException( Exception::class ); + + for ( $i = 0;$i < WP_HTML_Tag_Processor::MAX_BOOKMARKS;$i++ ) { + $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); + } + + $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); + } + + /** + * @ticket 56299 + * + * @covers seek + */ + public function test_limits_the_number_of_seek_calls() { + $p = new WP_HTML_Tag_Processor( '' ); + $p->next_tag( 'li' ); + $p->set_bookmark( 'bookmark' ); + + $this->expectException( Exception::class ); + + for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS; $i++ ) { + $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); + } + $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); + } +} diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/wp-html-tag-processor-test.php index 273cbddddea4bf..6a850eb7a4edb2 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/wp-html-tag-processor-test.php @@ -1296,18 +1296,24 @@ public function data_malformed_tag() { ); $examples['Multiple unclosed tags treated as a single tag'] = array( - '
    -test', - '
    -test', + << + test +HTML + , + << + test +HTML + , ); $examples['27'] = array(