Skip to content

Commit

Permalink
extend _cast_or_op
Browse files Browse the repository at this point in the history
  • Loading branch information
moregan authored and wchristian committed May 14, 2017
1 parent 8155ca7 commit c276409
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 67 deletions.
2 changes: 2 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ Revision history for Perl extension PPI
- fixed parsing of large numbers in Number::Exp on Solaris 80 (JMASLAK)
- make remove_child actually return undef on failure to find child to
remove
- higher accuracy when deciding whether certain characters are operators
or variable type casts (*&% etc.) (MOREGAN)

1.220 Tue 11 Nov 2014
Summary:
Expand Down
125 changes: 90 additions & 35 deletions lib/PPI/Token/Unknown.pm
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,14 @@ sub __TOKENIZER__on_char {
}
}

if ( $char eq '$' ) {
my $_class = $self->_cast_or_op( $t );
# Set class and rerun
$t->{class} = $t->{token}->set_class( $_class );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}

if ( $char eq '*' || $char eq '=' ) {
# Power operator '**' or mult-assign '*='
$t->{class} = $t->{token}->set_class( 'Operator' );
return 1;
}

return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);

$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );

Expand Down Expand Up @@ -176,18 +171,13 @@ sub __TOKENIZER__on_char {
# Get rest of line
pos $t->{line} = $t->{line_cursor} + 1;
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
# control-character symbol (e.g. @{^_Foo})
# control-character symbol (e.g. %{^_Foo})
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
}

if ( $char =~ /[\$@%*{]/ ) {
# It's a cast
$t->{class} = $t->{token}->set_class( 'Cast' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );

}
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);

# Probably the mod operator
$t->{class} = $t->{token}->set_class( 'Operator' );
Expand All @@ -209,11 +199,7 @@ sub __TOKENIZER__on_char {
return 1;
}

if ( $char =~ /[\$@%{]/ ) {
# The ampersand is a cast
$t->{class} = $t->{token}->set_class( 'Cast' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);

# Probably the binary and operator
$t->{class} = $t->{token}->set_class( 'Operator' );
Expand Down Expand Up @@ -271,26 +257,95 @@ sub __TOKENIZER__on_char {
PPI::Exception->throw('Unknown value in PPI::Token::Unknown token');
}

sub _is_cast_or_op {
my ( $self, $char ) = @_;
return 1 if $char eq '$';
return 1 if $char eq '@';
return 1 if $char eq '%';
return 1 if $char eq '*';
return 1 if $char eq '{';
return;
}

sub _as_cast_or_op {
my ( $self, $t ) = @_;
my $class = _cast_or_op( $t );
$t->{class} = $t->{token}->set_class( $class );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}

sub _prev_significant_w_cursor {
my ( $tokens, $cursor, $extra_check ) = @_;
while ( $cursor >= 0 ) {
my $token = $tokens->[ $cursor-- ];
next if !$token->significant;
next if $extra_check and !$extra_check->($token);
return ( $token, $cursor );
}
return ( undef, $cursor );
}

# Operator/operand-sensitive, multiple or GLOB cast
sub _cast_or_op {
my ( undef, $t ) = @_;
my ( $prev ) = @{ $t->_previous_significant_tokens(1) };
return 'Cast' if !$prev;

return 'Operator' if
$prev->isa('PPI::Token::Symbol')
or
$prev->isa('PPI::Token::Number')
or
(
$prev->isa('PPI::Token::Structure')
and
$prev->content =~ /^(?:\)|\])$/
my ( $t ) = @_;

my $tokens = $t->{tokens};
my $cursor = scalar( @$tokens ) - 1;
my $token;

( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
return 'Cast' if !$token; # token was first in the document

if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) {

# Scan the token stream backwards an arbitrarily long way,
# looking for the matching opening curly brace.
my $structure_depth = 1;
( $token, $cursor ) = _prev_significant_w_cursor(
$tokens, $cursor,
sub {
my ( $token ) = @_;
return if !$token->isa( 'PPI::Token::Structure' );
if ( $token eq '}' ) {
$structure_depth++;
return;
}
if ( $token eq '{' ) {
$structure_depth--;
return if $structure_depth;
}
return 1;
}
);
return 'Operator' if !$token; # no matching '{', probably an unbalanced '}'

# Scan past any whitespace
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
return 'Operator' if !$token; # Document began with what must be a hash constructor.
return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript

my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @;
return 'Operator' if $meth_or_subscript_end{ $token->content }; # subscript

my $content = $token->content;
my $produces_or_wants_value =
( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) );
return $produces_or_wants_value ? 'Operator' : 'Cast';
}

my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @;
return 'Cast'
if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content }
or $token->isa( 'PPI::Token::Cast' )
or $token->isa( 'PPI::Token::Operator' )
or $token->isa( 'PPI::Token::Label' );

return 'Operator' if !$token->isa( 'PPI::Token::Word' );

( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
return 'Cast' if !$token || $token->content ne '->';

# This is pretty weak, there's room for a dozen more tests before going with
# a default. Or even better, a proper operator/operand method :(
return 'Cast';
return 'Operator';
}

# Are we at a location where a ':' would indicate a subroutine attribute
Expand Down
44 changes: 12 additions & 32 deletions t/ppi_token_unknown.t
Original file line number Diff line number Diff line change
Expand Up @@ -100,36 +100,28 @@ OPERATOR_CAST: {
test_varying_whitespace( @nothing, @asterisk_cast, @scalar );
}

{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @number, @percent_op, @scalar );
test_varying_whitespace( @number, @percent_op, @list );
test_varying_whitespace( @number, @percent_op, @hash );
test_varying_whitespace( @number, @percent_op, @glob );
test_varying_whitespace( @number, @percent_op, @hashctor1 );
test_varying_whitespace( @number, @percent_op, @hashctor2 );
test_varying_whitespace( @number, @percent_op, @hashctor3 );
}
test_varying_whitespace( @number, @percenteq_op, @bareword );
test_varying_whitespace( @number, @percenteq_op, @hashctor3 ); # doesn't compile, but make sure it's an operator
{
local %known_bad_seps = map { $_ => 1 } qw( space );
test_varying_whitespace( @nothing, @percent_cast, @scalar );
}

{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @number, @ampersand_op, @scalar );
test_varying_whitespace( @number, @ampersand_op, @list );
test_varying_whitespace( @number, @ampersand_op, @hash );
}

test_varying_whitespace( @number, @ampersand_op, @glob );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @number, @ampersand_op, @hashctor1 );
test_varying_whitespace( @number, @ampersand_op, @hashctor2 );
test_varying_whitespace( @number, @ampersand_op, @hashctor3 );
}
test_varying_whitespace( @number, @ampersandeq_op, @bareword );
test_varying_whitespace( @number, @ampersandeq_op, @hashctor3 ); # doesn't compile, but make sure it's an operator
{
Expand All @@ -156,27 +148,28 @@ OPERATOR_CAST: {
}

my @single = ( "'3'", [ 'PPI::Token::Quote::Single' => "'3'", ] );
test_varying_whitespace( @single, @asterisk_op, @scalar );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @single, @asterisk_op, @scalar );
test_varying_whitespace( @single, @asterisk_op, @hashctor3 );
}
test_varying_whitespace( @single, @percent_op, @scalar );
test_varying_whitespace( @single, @percent_op, @hashctor3 );
test_varying_whitespace( @single, @ampersand_op, @scalar );
test_varying_whitespace( @single, @ampersand_op, @hashctor3 );

my @double = ( '"3"', [ 'PPI::Token::Quote::Double' => '"3"', ] );
test_varying_whitespace( @double, @asterisk_op, @scalar );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @double, @asterisk_op, @hashctor3 );
}
test_varying_whitespace( @double, @percent_op, @scalar );
test_varying_whitespace( @double, @percent_op, @hashctor3 );
test_varying_whitespace( @double, @ampersand_op, @scalar );
test_varying_whitespace( @double, @ampersand_op, @hashctor3 );
}

test_varying_whitespace( @scalar, @asterisk_op, @scalar );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @scalar, @percent_op, @scalar );
test_varying_whitespace( @scalar, @ampersand_op, @scalar );

Expand All @@ -192,7 +185,7 @@ OPERATOR_CAST: {
]
);
{
local %known_bad_seps = ( %known_bad_seps, map { $_ => 1 } qw( space ) );
local %known_bad_seps = map { $_ => 1 } qw( null space );
test_varying_whitespace( @package, @asterisk_cast, @scalar, 1 );
test_varying_whitespace( @package, @asterisk_cast, @hashctor3, 1 );
test_varying_whitespace( @package, @percent_cast, @scalar, 1 );
Expand All @@ -201,7 +194,6 @@ OPERATOR_CAST: {
test_varying_whitespace( @package, @ampersand_cast, @hashctor3, 1 );
test_varying_whitespace( @package, @at_cast, @scalar, 1 );
test_varying_whitespace( @package, @at_cast, @listctor, 1 );
}
}

my @sub = (
Expand Down Expand Up @@ -300,10 +292,11 @@ OPERATOR_CAST: {
'PPI::Token::Structure' => '}',
]
);
test_varying_whitespace( @evalblock, @asterisk_op, @scalar );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @evalblock, @asterisk_op, @scalar );
test_varying_whitespace( @evalblock, @asterisk_op, @hashctor3 );
}
test_varying_whitespace( @evalblock, @percent_op, @scalar );
test_varying_whitespace( @evalblock, @percent_op, @hashctor3 );
test_varying_whitespace( @evalblock, @ampersand_op, @scalar );
Expand All @@ -317,12 +310,14 @@ OPERATOR_CAST: {
]
);
test_varying_whitespace( @evalstring, @asterisk_op, @scalar );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @evalstring, @asterisk_op, @hashctor3 );
}
test_varying_whitespace( @evalstring, @percent_op, @scalar );
test_varying_whitespace( @evalstring, @percent_op, @hashctor3 );
test_varying_whitespace( @evalstring, @ampersand_op, @scalar );
test_varying_whitespace( @evalstring, @ampersand_op, @hashctor3 );
}

my @curly_subscript1 = (
'$y->{x}',
Expand Down Expand Up @@ -383,8 +378,6 @@ OPERATOR_CAST: {
]
);

{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @curly_subscript1, @asterisk_op, @scalar );
test_varying_whitespace( @curly_subscript1, @percent_op, @scalar );
test_varying_whitespace( @curly_subscript1, @ampersand_op, @scalar );
Expand All @@ -394,13 +387,9 @@ OPERATOR_CAST: {
test_varying_whitespace( @curly_subscript3, @asterisk_op, @scalar );
test_varying_whitespace( @curly_subscript3, @percent_op, @scalar );
test_varying_whitespace( @curly_subscript3, @ampersand_op, @scalar );
}
test_varying_whitespace( @square_subscript1, @asterisk_op, @scalar );
{
local %known_bad_seps = map { $_ => 1 } qw( null );
test_varying_whitespace( @square_subscript1, @percent_op, @scalar );
test_varying_whitespace( @square_subscript1, @ampersand_op, @scalar );
}

{
local %known_bad_seps = map { $_ => 1 } qw( space );
Expand All @@ -411,8 +400,6 @@ OPERATOR_CAST: {
test_varying_whitespace( 'values', [ 'PPI::Token::Word' => 'values' ], @percent_cast, @hashctor3 );
}

TODO: {
local $TODO = "known bug";
test_statement(
'} *$a', # unbalanced '}' before '*', arbitrary decision
[
Expand All @@ -423,7 +410,6 @@ TODO: {
'PPI::Token::Symbol' => '$a',
]
);
}

test_statement(
'$bar = \%*$foo', # multiple consecutive casts
Expand All @@ -437,8 +423,6 @@ TODO: {
]
);

TODO: {
local $TODO = "known bug";
test_statement(
'$#tmp*$#tmp2',
[
Expand All @@ -447,7 +431,6 @@ TODO: {
'PPI::Token::ArrayIndex' => '$#tmp2',
]
);
}

test_statement(
'[ %{$req->parameters} ]', # preceded by '['
Expand Down Expand Up @@ -484,8 +467,6 @@ TODO: {
]
);

TODO: {
local $TODO = "known bug";
test_statement(
'++$i%$f', # '%' wrongly a cast through 1.220.
[
Expand All @@ -496,7 +477,6 @@ TODO: {
'PPI::Token::Symbol' => '$f',
]
);
}

{ # these need to be fixed in PPI::Lexer->_statement, fixing these will break other tests that need to be changed
local $TODO = "clarify type of statement in constructor";
Expand Down

0 comments on commit c276409

Please sign in to comment.