-
Notifications
You must be signed in to change notification settings - Fork 2.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HTML API: Add CSS selector support #7857
base: trunk
Are you sure you want to change the base?
Changes from all commits
0e8c4fb
2d3d283
40222d3
6092642
3e3b2b2
967557f
2ec1db3
ee2c7ce
0f708ba
3cb455d
5609e50
4f25bc2
943293f
24c9744
a7c10b9
dd718b7
5884aca
a9a077f
5f53e0a
effbbbe
62ec5bb
153f009
fcc6401
21c67e5
728d798
e1e8e09
13ac3c1
a3c25e8
ad5c600
6758704
e97842c
ef00856
463e799
0f5b28c
f4a491a
b680b1b
e7da05f
d5e7e60
08187c6
5a5066c
2f8bd19
8b0ac55
dffcac6
9f81744
d4c6f38
6432056
5c746cd
501102a
f98fbb3
c8f16e1
c689c9c
1221efa
e5e94b1
1e888ba
dd4fcb0
256c55a
465cc36
467d45d
44bfc64
ca4531c
489db93
e57a211
509e648
58c1698
c9b9145
e5cac63
2bafae9
8fe57e3
ab2fe0d
6a6969f
27ca891
9ff2769
d1a276b
4909b56
1d45225
0b277b4
0c53c42
d966e9a
5201ba9
2036a83
3421a4e
784b2d9
4d4c5fe
dbc37fc
d241f31
663070b
5478af9
4f6bf94
fe07dfd
143e092
32ee2a7
5922494
e492aa6
81c6758
7bccf3e
3949cc5
c696889
c193551
9dd8114
b134308
94c06ef
f46fced
1bacfd7
a274ea0
12a0a99
0e2b34a
dea1029
d268f4c
d89fbd9
8ced3aa
ca1a129
71fd62a
4a3e084
25dbb19
70cf7f7
355c9a2
7ef67c1
abb4d25
9ac05b4
3206e0b
46646b5
f217eb0
6154742
577b3a3
5ea93ab
adfebdf
db469e6
400263a
483a819
1f64168
3bfb8a1
8d2aef2
33b8333
d7e840c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,367 @@ | ||
<?php | ||
/** | ||
* HTML API: WP_CSS_Attribute_Selector class | ||
* | ||
* @package WordPress | ||
* @subpackage HTML-API | ||
* @since 6.8.0 | ||
*/ | ||
|
||
/** | ||
* CSS attribute selector. | ||
* | ||
* This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. | ||
* | ||
* @since 6.8.0 | ||
* | ||
* @access private | ||
*/ | ||
final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser_Matcher { | ||
/** | ||
* The attribute value is matched exactly. | ||
* | ||
* @example | ||
* | ||
* [att=val] | ||
*/ | ||
const MATCH_EXACT = 'exact'; | ||
|
||
/** | ||
* The attribute value matches any value in a whitespace separated list of words exactly. | ||
* | ||
* @example | ||
* | ||
* [attr~=value] | ||
*/ | ||
const MATCH_ONE_OF_EXACT = 'one-of'; | ||
|
||
/** | ||
* The attribute value is matched exactly or matches the beginning of the attribute | ||
* immediately followed by a hyphen. | ||
* | ||
* @example | ||
* | ||
* [attr|=value] | ||
*/ | ||
const MATCH_EXACT_OR_HYPHEN_PREFIXED = 'exact-or-hyphen-prefixed'; | ||
|
||
/** | ||
* The attribute value matches the start of the attribute. | ||
* | ||
* @example | ||
* | ||
* [attr^=value] | ||
*/ | ||
const MATCH_PREFIXED_BY = 'prefixed'; | ||
|
||
/** | ||
* The attribute value matches the end of the attribute. | ||
* | ||
* @example | ||
* | ||
* [attr$=value] | ||
*/ | ||
const MATCH_SUFFIXED_BY = 'suffixed'; | ||
|
||
/** | ||
* The attribute value is contained in the attribute. | ||
* | ||
* @example | ||
* | ||
* [attr*=value] | ||
*/ | ||
const MATCH_CONTAINS = 'contains'; | ||
|
||
/** | ||
* Modifier for case sensitive matching. | ||
* | ||
* @example | ||
* | ||
* [attr=value s] | ||
*/ | ||
const MODIFIER_CASE_SENSITIVE = 'case-sensitive'; | ||
|
||
/** | ||
* Modifier for case insensitive matching. | ||
* | ||
* @example | ||
* | ||
* [attr=value i] | ||
*/ | ||
const MODIFIER_CASE_INSENSITIVE = 'case-insensitive'; | ||
|
||
/** | ||
* The name of the attribute to match. | ||
* | ||
* @var string | ||
*/ | ||
public $name; | ||
|
||
/** | ||
* The attribute matcher. | ||
* | ||
* Allowed string values are the class constants: | ||
* - {@see WP_CSS_Attribute_Selector::MATCH_EXACT} | ||
* - {@see WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT} | ||
* - {@see WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED} | ||
* - {@see WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY} | ||
* - {@see WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY} | ||
* - {@see WP_CSS_Attribute_Selector::MATCH_CONTAINS} | ||
* | ||
* @var string|null | ||
*/ | ||
public $matcher; | ||
|
||
/** | ||
* The attribute value to match. | ||
* | ||
* @var string|null | ||
*/ | ||
public $value; | ||
|
||
/** | ||
* The attribute modifier. | ||
* | ||
* Allowed string values are the class constants: | ||
* - {@see WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE} | ||
* - {@see WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE} | ||
* | ||
* @var string|null | ||
*/ | ||
public $modifier; | ||
|
||
/** | ||
* Constructor. | ||
* | ||
* @param string $name The attribute name. | ||
* @param string|null $matcher The attribute matcher. | ||
* Must be one of the class MATCH_* constants or null. | ||
* @param string|null $value The attribute value to match. | ||
* @param string|null $modifier The attribute case modifier. | ||
* Must be one of the class MODIFIER_* constants or null. | ||
*/ | ||
private function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { | ||
$this->name = $name; | ||
$this->matcher = $matcher; | ||
$this->value = $value; | ||
$this->modifier = $modifier; | ||
} | ||
|
||
/** | ||
* Determines if the processor's current position matches the selector. | ||
* | ||
* @param WP_HTML_Tag_Processor $processor The processor. | ||
* @return bool True if the processor's current position matches the selector. | ||
*/ | ||
public function matches( WP_HTML_Tag_Processor $processor ): bool { | ||
$att_value = $processor->get_attribute( $this->name ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not a critical point, but we have largely used |
||
if ( null === $att_value ) { | ||
return false; | ||
} | ||
|
||
if ( null === $this->value ) { | ||
return true; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here it reads as if what do you think about this? |
||
|
||
if ( true === $att_value ) { | ||
$att_value = ''; | ||
} | ||
|
||
$case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; | ||
|
||
switch ( $this->matcher ) { | ||
case self::MATCH_EXACT: | ||
return $case_insensitive | ||
? 0 === strcasecmp( $att_value, $this->value ) | ||
: $att_value === $this->value; | ||
|
||
case self::MATCH_ONE_OF_EXACT: | ||
foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) { | ||
if ( | ||
$case_insensitive | ||
? 0 === strcasecmp( $val, $this->value ) | ||
: $val === $this->value | ||
) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. while this seems fine for now, I suspect that at some point we will prefer to crawl through the attribute value comparing as we go with
|
||
|
||
case self::MATCH_EXACT_OR_HYPHEN_PREFIXED: | ||
// Attempt the full match first | ||
if ( | ||
$case_insensitive | ||
? 0 === strcasecmp( $att_value, $this->value ) | ||
: $att_value === $this->value | ||
) { | ||
return true; | ||
} | ||
|
||
// Partial match | ||
if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) { | ||
return false; | ||
} | ||
|
||
$starts_with = "{$this->value}-"; | ||
return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like this whole thing could be collapsed into a single call to $exact_length = strlen( $this->value );
$matches_prefix = substr_compare( $att_value, $this->value, 0, $exact_length, $case_insensitive );
return (
0 === $matches_prefix &&
( strlen( $att_value ) === $exact_length || '-' === $att_value[ $exact_length ] )
); |
||
|
||
case self::MATCH_PREFIXED_BY: | ||
return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive ); | ||
|
||
case self::MATCH_SUFFIXED_BY: | ||
return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive ); | ||
|
||
case self::MATCH_CONTAINS: | ||
return false !== ( | ||
$case_insensitive | ||
? stripos( $att_value, $this->value ) | ||
: strpos( $att_value, $this->value ) | ||
); | ||
} | ||
} | ||
|
||
/** | ||
* Splits a string into a list of whitespace delimited values. | ||
* | ||
* This is useful for the {@see WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT} matcher. | ||
* | ||
* @param string $input | ||
* | ||
* @return Generator<string> | ||
*/ | ||
private function whitespace_delimited_list( string $input ): Generator { | ||
// Start by skipping whitespace. | ||
$offset = strspn( $input, self::WHITESPACE_CHARACTERS ); | ||
|
||
while ( $offset < strlen( $input ) ) { | ||
// Find the byte length until the next boundary. | ||
$length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); | ||
$value = substr( $input, $offset, $length ); | ||
|
||
// Move past trailing whitespace. | ||
$offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); | ||
|
||
yield $value; | ||
} | ||
} | ||
|
||
/** | ||
* Parses a selector string to create a selector instance. | ||
* | ||
* To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. | ||
* | ||
* @param string $input The selector string. | ||
* @param int $offset The offset into the string. The offset is passed by reference and | ||
* will be updated if the parse is successful. | ||
* @return static|null The selector instance, or null if the parse was unsuccessful. | ||
*/ | ||
public static function parse( string $input, int &$offset ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not immediately a fan of mutating the offset passed into the function. did you consider some other examples of passing in something like |
||
// Need at least 3 bytes [x] | ||
if ( $offset + 2 >= strlen( $input ) ) { | ||
return null; | ||
} | ||
|
||
$updated_offset = $offset; | ||
|
||
if ( '[' !== $input[ $updated_offset ] ) { | ||
return null; | ||
} | ||
++$updated_offset; | ||
|
||
self::parse_whitespace( $input, $updated_offset ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above: would be nice if instead of $at += self::skip_whitespace( $input, $at ); |
||
$attr_name = self::parse_ident( $input, $updated_offset ); | ||
if ( null === $attr_name ) { | ||
return null; | ||
} | ||
self::parse_whitespace( $input, $updated_offset ); | ||
|
||
if ( $updated_offset >= strlen( $input ) ) { | ||
return null; | ||
} | ||
|
||
if ( ']' === $input[ $updated_offset ] ) { | ||
$offset = $updated_offset + 1; | ||
return new WP_CSS_Attribute_Selector( $attr_name ); | ||
} | ||
|
||
// need to match at least `=x]` at this point | ||
if ( $updated_offset + 3 >= strlen( $input ) ) { | ||
return null; | ||
} | ||
|
||
if ( '=' === $input[ $updated_offset ] ) { | ||
++$updated_offset; | ||
$attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT; | ||
} elseif ( '=' === $input[ $updated_offset + 1 ] ) { | ||
switch ( $input[ $updated_offset ] ) { | ||
case '~': | ||
$attr_matcher = WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT; | ||
$updated_offset += 2; | ||
break; | ||
case '|': | ||
$attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED; | ||
$updated_offset += 2; | ||
break; | ||
case '^': | ||
$attr_matcher = WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY; | ||
$updated_offset += 2; | ||
break; | ||
case '$': | ||
$attr_matcher = WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY; | ||
$updated_offset += 2; | ||
break; | ||
case '*': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when I explored this long ago, I actually felt like the symbols in use in CSS provided reasonable literal values in the code vs. the use of |
||
$attr_matcher = WP_CSS_Attribute_Selector::MATCH_CONTAINS; | ||
$updated_offset += 2; | ||
break; | ||
default: | ||
return null; | ||
} | ||
} else { | ||
return null; | ||
} | ||
|
||
self::parse_whitespace( $input, $updated_offset ); | ||
$attr_val = | ||
self::parse_string( $input, $updated_offset ) ?? | ||
self::parse_ident( $input, $updated_offset ); | ||
|
||
if ( null === $attr_val ) { | ||
return null; | ||
} | ||
|
||
self::parse_whitespace( $input, $updated_offset ); | ||
if ( $updated_offset >= strlen( $input ) ) { | ||
return null; | ||
} | ||
|
||
$attr_modifier = null; | ||
switch ( $input[ $updated_offset ] ) { | ||
case 'i': | ||
case 'I': | ||
$attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; | ||
++$updated_offset; | ||
break; | ||
|
||
case 's': | ||
case 'S': | ||
$attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; | ||
++$updated_offset; | ||
break; | ||
} | ||
|
||
if ( null !== $attr_modifier ) { | ||
self::parse_whitespace( $input, $updated_offset ); | ||
if ( $updated_offset >= strlen( $input ) ) { | ||
return null; | ||
} | ||
} | ||
|
||
if ( ']' === $input[ $updated_offset ] ) { | ||
$offset = $updated_offset + 1; | ||
return new self( $attr_name, $attr_matcher, $attr_val, $attr_modifier ); | ||
} | ||
|
||
return null; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this reads like the hyphen comes first, but in the CSS selector, it specifically connotes that a hyphen follows the match. would
HYPHEN_SUFFIXED
be more accurate?