diff --git a/src/store.php b/src/store.php index 30c6aed..35f9297 100755 --- a/src/store.php +++ b/src/store.php @@ -158,9 +158,7 @@ public static function tokenizer($query) { ) | (? - (?(?(?:.(?!(? @@ -170,20 +168,20 @@ public static function tokenizer($query) { (? \) ) -)/x +)/xi REGEX; do { $result = preg_match($token, $query, $matches, PREG_OFFSET_CAPTURE); if ($result) { - $query = substr($query, strlen($matches[0][0])); - // todo: swap filters, first remove numeric keys + $value = $matches[0][0]; + $offset = $matches[0][1]; + $query = substr($query, strlen($value) + $offset); yield array_filter( - array_filter($matches, function($match) { - return $match[0]; - }), - function($key) { - return !is_int($key); - }, ARRAY_FILTER_USE_KEY + $matches, + function($val, $key) { + return !is_int($key) && $val[0]; + }, + ARRAY_FILTER_USE_BOTH ); } } while($result); diff --git a/src/store/MySQLQueryParser.php b/src/store/MySQLQueryParser.php index 73cc235..a7cec46 100755 --- a/src/store/MySQLQueryParser.php +++ b/src/store/MySQLQueryParser.php @@ -18,42 +18,40 @@ public function __construct($tokenizer) { public function parse($query) { $indent = 0; - $part = ''; - $sql = ''; + $sql = []; $position = 0; $expect = 'name|parenthesis_open|not'; foreach( call_user_func($this->tokenizer, $query) as $token ) { - $type = key($token); - list($token, $offset)=$token[$type]; - if ( !preg_match("/^$expect$/",$type) ) { - throw new \LogicException('Parse error at '.$position.': expected '.$expect.', got '.$type.': ' + $tokenType = key($token); + list($tokenValue, $offset)=$token[$tokenType]; + if ( !preg_match("/^$expect$/",$tokenType) ) { + throw new \LogicException('Parse error at '.$position.': expected '.$expect.', got '.$tokenType.': ' .(substr($query,0, $position)." --> ".substr($query,$position)) ); } - switch($type) { + switch($tokenType) { case 'number': case 'string': - $sql .= $part.$token; - $part = ''; + $sql[] = $tokenValue; $expect = 'operator|parenthesis_close'; break; case 'name': - switch ($token) { + switch ($tokenValue) { case 'nodes.path': case 'nodes.parent': case 'nodes.name': case 'nodes.mtime': case 'nodes.ctime': - $part = $token; + $sql[] = $tokenValue; break; default: - $part = "JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.".$token."'))"; + $sql[] = "JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.".$tokenValue."'))"; break; } $expect = 'compare'; break; case 'compare': - switch( $token ) { + switch( $tokenValue ) { case '>': case '>=': case '<': @@ -61,36 +59,38 @@ public function parse($query) case '=': case '<>': case '!=': - $part.=$token; + $sql[] =$tokenValue; break; case '?': - $part.= ' IS NOT NULL'; - str_replace($part, '->>', '->'); + $part = $sql[count($sql)-1]; + $part = str_replace('->>', '->', $part); + $sql[count($sql)-1] = $part; + $sql[] = 'IS NOT NULL'; break; case '~=': - $part.=' like '; + $sql[] = 'like'; break; case '!~': - $part.=' not like '; + $sql[] = 'not like'; break; } $expect = 'number|string'; break; case 'not': - $sql .= $token; + $sql[] = $tokenValue; $expect = 'name|parenthesis_open'; break; case 'operator': - $sql .= ' '.$token.' '; + $sql[] = $tokenValue; $expect = 'name|parenthesis_open|not'; break; case 'parenthesis_open': - $sql .= $token; + $sql[] = $tokenValue; $indent++; $expect = 'name|parenthesis_open|not'; break; case 'parenthesis_close': - $sql .= $token; + $sql[] = $tokenValue; $indent--; if ( $indent>0 ) { $expect = 'operator|parenthesis_close'; @@ -99,16 +99,23 @@ public function parse($query) } break; } - $position += $offset + strlen($token); + $position += $offset + strlen($tokenValue); } if ( $indent!=0 ) { throw new \LogicException('unbalanced parenthesis'); - } else if ( trim($part) ) { - $position -= strlen($token); - throw new \LogicException('parse error at '.$position.': '.(substr($query,0, $position)." --> ".substr($query,$position))); - } else { - return $sql; + } + if ($position ".substr($query,$position)) ); } + foreach(['number','string','compare'] as $tokenType) { + if (strpos($expect, $tokenType)!==false) { + throw new \LogicException('Parse error at '.$position.': expected '.$expect.': ' + .(substr($query,0, $position)." --> ".substr($query,$position)) ); + + } + } + return implode(' ',$sql); } } diff --git a/src/store/MySQLStore.php b/src/store/MySQLStore.php index a98c4de..73f0a99 100755 --- a/src/store/MySQLStore.php +++ b/src/store/MySQLStore.php @@ -21,6 +21,9 @@ final class MySQLStore implements Store { public function __construct($db = null, $queryParser = null, $resultHandler = null, $path = '/') { $this->db = $db; + if ($this->db) { + $this->db->setAttribute(\PDO::MYSQL_ATTR_USE_BUFFERED_QUERY, false); + } $this->queryParser = $queryParser; $this->resultHandler = $resultHandler; $this->path = \arc\path::collapse($path); diff --git a/src/store/PSQLQueryParser.php b/src/store/PSQLQueryParser.php index 3a45525..0ef60f0 100755 --- a/src/store/PSQLQueryParser.php +++ b/src/store/PSQLQueryParser.php @@ -18,42 +18,39 @@ public function __construct($tokenizer) { public function parse($query) { $indent = 0; - $part = ''; - $sql = ''; + $sql = []; $position = 0; $expect = 'name|parenthesis_open|not'; - foreach( call_user_func($this->tokenizer, $query) as $token ) { - $type = key($token); - list($token, $offset)=$token[$type]; - if ( !preg_match("/^$expect$/",$type) ) { - throw new \LogicException('Parse error at '.$position.': expected '.$expect.', got '.$type.': ' + $tokenType = key($token); + list($tokenValue, $offset)=$token[$tokenType]; + if ( !preg_match("/^$expect$/",$tokenType) ) { + throw new \LogicException('Parse error at '.$position.': expected '.$expect.', got '.$tokenType.': ' .(substr($query,0, $position)." --> ".substr($query,$position)) ); } - switch($type) { + switch($tokenType) { case 'number': case 'string': - $sql .= $part.$token; - $part = ''; + $sql[] = $tokenValue; $expect = 'operator|parenthesis_close'; break; case 'name': - switch ($token) { + switch ($tokenValue) { case 'nodes.path': case 'nodes.parent': case 'nodes.name': case 'nodes.mtime': case 'nodes.ctime': - $part = $token; + $sql[] = $tokenValue; break; default: - $part = "nodes.data #>> '{".str_replace('.',',',$token)."}'"; + $sql[] = "nodes.data #>> '{".str_replace('.',',',$tokenValue)."}'"; break; } $expect = 'compare'; break; case 'compare': - switch( $token ) { + switch( $tokenValue ) { case '>': case '>=': case '<': @@ -61,36 +58,38 @@ public function parse($query) case '=': case '<>': case '!=': - $part.=$token; + $sql[] = $tokenValue; break; case '?': - $part.=$token; - str_replace($part, '#>>', '#>'); + $part = $sql[count($sql)-1]; + $part = str_replace('#>>', '#>', $part); + $sql[count($sql)-1] = $part; + $sql[] = $tokenValue; break; case '~=': - $part.=' like '; + $sql[] = 'like'; break; case '!~': - $part.=' not like '; + $sql[] = 'not like'; break; } $expect = 'number|string'; break; case 'not': - $sql .= $token; + $sql[] = $tokenValue; $expect = 'name|parenthesis_open'; break; case 'operator': - $sql .= ' '.$token.' '; + $sql[] = $tokenValue; $expect = 'name|parenthesis_open|not'; break; case 'parenthesis_open': - $sql .= $token; + $sql[] = $tokenValue; $indent++; $expect = 'name|parenthesis_open|not'; break; case 'parenthesis_close': - $sql .= $token; + $sql[] = $tokenValue; $indent--; if ( $indent>0 ) { $expect = 'operator|parenthesis_close'; @@ -99,16 +98,23 @@ public function parse($query) } break; } - $position += $offset + strlen($token); + $position += $offset + strlen($tokenValue); } if ( $indent!=0 ) { throw new \LogicException('unbalanced parenthesis'); - } else if ( trim($part) ) { - $position -= strlen($token); - throw new \LogicException('parse error at '.$position.': '.(substr($query,0, $position)." --> ".substr($query,$position))); - } else { - return $sql; } + if ($position ".substr($query,$position)) ); + } + foreach(['number','string','compare'] as $tokenType) { + if (strpos($expect, $tokenType)!==false) { + throw new \LogicException('Parse error at '.$position.': expected '.$expect.': ' + .(substr($query,0, $position)." --> ".substr($query,$position)) ); + + } + } + return implode(' ',$sql); } } diff --git a/src/store/ResultHandlers.php b/src/store/ResultHandlers.php index d337fe1..892d551 100644 --- a/src/store/ResultHandlers.php +++ b/src/store/ResultHandlers.php @@ -28,7 +28,7 @@ public static function getDBGeneratorHandler($db) $result = $q->execute($args); $data = $q->fetch(\PDO::FETCH_ASSOC); if (!$data) { - yield $data; + return $data; } while ($data) { $value = (object) $data; diff --git a/src/store/Tokenizer.php b/src/store/Tokenizer.php deleted file mode 100644 index dc5bd41..0000000 --- a/src/store/Tokenizer.php +++ /dev/null @@ -1,82 +0,0 @@ - - /* - query syntax: - part = ( name '.' )* name compare value - query = part | part operator (not) part | (not) '(' query ')' - operator = 'and' | 'or' - not = 'not' - compare = '<' | '>' | '=' | '>=' | '<=' | 'like' | 'not like' | '?' - value = number | string - number = [0-9]* ('.' [0-9]+)? - string = \' [^\\1]* \' - - e.g: "contact.address.street like '%Crescent%' and ( name.firstname = 'Foo' or name.lastname = 'Bar')" - */ - - /** - * yields the tokens in the search query expression - * @param string $query - * @return \Generator - * @throws \LogicException - */ - - private function tokens($query) - { - $token = <<<'REGEX' -/^\s* -( - (? - <= | >= | <> | < | > | = | != | ~= | !~ | \? - ) - | - (? - and | or - ) - | - (? - not - ) - | - (? - [a-z]+[a-z0-9_-]* - (?: \. [a-z]+[a-z0-9_-]* )* - ) - | - (? - [+-]?[0-9](\.[0-9]+)? - ) - | - (? - (?(?(?:.(?!(? - \( - ) - | - (? - \) - ) -)/x -REGEX; - do { - $result = preg_match($token, $query, $matches, PREG_OFFSET_CAPTURE); - if ($result) { - $query = substr($query, strlen($matches[0][0])); - // todo: swap filters, first remove numeric keys - yield array_filter( - array_filter($matches, function($match) { - return $match[0]; - }), - function($key) { - return !is_int($key); - }, ARRAY_FILTER_USE_KEY - ); - } - } while($result); - if ( trim($query) ) { - throw new \LogicException('Could not parse '.$query); - } - } diff --git a/src/store/TreeQueryParser.php b/src/store/TreeQueryParser.php index d1045ed..9bc10b7 100644 --- a/src/store/TreeQueryParser.php +++ b/src/store/TreeQueryParser.php @@ -19,7 +19,7 @@ public function parse($query) { $indent = 0; $part = ''; - $check = ''; + $fn = []; $currentCheck = []; $position = 0; $expect = 'name|parenthesis_open|not'; @@ -35,9 +35,9 @@ public function parse($query) case 'number': case 'string': if (strpos($part, '{placeholder}')!==false) { - $check .= str_replace('{placeholder}', $token, $part); + $fn[] = str_replace('{placeholder}', $token, $part); } else { - $check .= $part.$token; + $fn[] = $part.$token; } $part = ''; $expect = 'operator|parenthesis_close'; @@ -93,27 +93,27 @@ public function parse($query) $expect = 'number|string'; break; case 'not': - $check .= '!'; + $fn[] = '!'; $expect = 'name|parenthesis_open'; break; case 'operator': switch($token) { case 'and': - $check .= ' && '; + $fn[] = '&&'; break; case 'or': - $check .= ' || '; + $fn[] = '||'; break; } $expect = 'name|parenthesis_open|not'; break; case 'parenthesis_open': - $check .= $token; + $fn[] = $token; $indent++; $expect = 'name|parenthesis_open|not'; break; case 'parenthesis_close': - $check .= $token; + $fn[] = $token; $indent--; if ( $indent>0 ) { $expect = 'operator|parenthesis_close'; @@ -130,7 +130,13 @@ public function parse($query) $position -= strlen($token); throw new \LogicException('parse error at '.$position.': '.(substr($query,0, $position)." --> ".substr($query,$position))); } else { - return $check; + $fn = implode(' ',$fn); + $like = function($haystack, $needle) { + $re = str_replace('%', '.*', $needle); + return preg_match('|'.$re.'|i', $haystack); + }; + $script = 'return function($node) use ($like) { return '.$fn.'; };'; + return eval($script); } } diff --git a/src/store/TreeStore.php b/src/store/TreeStore.php index 354824a..c53ce2f 100644 --- a/src/store/TreeStore.php +++ b/src/store/TreeStore.php @@ -44,8 +44,8 @@ public function find($query, $path='') { $path = \arc\path::collapse($path, $this->path); $root = $this->tree->cd($path); - $fn = $this->queryParser->parse($query, $path); - return call_user_func($this->resultHandler, $fn ); + $f = $this->queryParser->parse($query, $path); + return call_user_func($this->resultHandler, $f ); } /** @@ -154,13 +154,7 @@ public function delete($path = '') { public static function getResultHandler($tree) { - $like = function($haystack, $needle) { - $re = str_replace('%', '.*', $needle); - return preg_match('|'.$re.'|i', $haystack); - }; - return function($fn) use ($tree, $like) { - $script = 'return function($node) use ($like) { return '.$fn.'; };'; - $callback = eval($script); + return function($callback) use ($tree) { $dataset = \arc\tree::filter($tree, $callback); foreach($dataset as $path => $node) { $node = json_decode(json_encode($node),false); diff --git a/tests/store.Test.php b/tests/store.Test.php index b250705..40e89c1 100755 --- a/tests/store.Test.php +++ b/tests/store.Test.php @@ -23,7 +23,6 @@ class TestStore extends PHPUnit\Framework\TestCase public static function setupBeforeClass() : void { - self::$tree = \arc\tree::expand([]); self::$store = new \arc\store\TreeStore( self::$tree, @@ -32,6 +31,7 @@ public static function setupBeforeClass() : void ); // self::$store = \arc\store::connect(self::$dns[0]); self::$store->initialize(); + } function __construct() @@ -39,70 +39,78 @@ function __construct() parent::__construct(); } - function testTreeQuery() + function testPostgresqlStoreQuery() { - $qp = new \arc\store\TreeQueryParser(array('\arc\store','tokenizer')); + $qp = new \arc\store\PSQLQueryParser(array('\arc\store','tokenizer')); $result = $qp->parse("nodes.path='/'"); - $this->assertEquals("( \$node->getPath() ?? null ) =='/'", $result); + $this->assertEquals("nodes.path = '/'", $result); $result = $qp->parse("foo.bar='baz'"); - $this->assertEquals("( \$node->nodeValue->foo->bar ?? null ) =='baz'", $result); + $this->assertEquals("nodes.data #>> '{foo,bar}' = 'baz'", $result); $result = $qp->parse("foo.bar !~ 'b%z'"); - $this->assertEquals("!\$like(\$node->nodeValue->foo->bar ?? null,'b%z')", $result); + $this->assertEquals("nodes.data #>> '{foo,bar}' not like 'b%z'", $result); $result = $qp->parse("foo.bar ~= 'b%z'"); - $this->assertEquals("\$like(\$node->nodeValue->foo->bar ?? null,'b%z')", $result); + $this->assertEquals("nodes.data #>> '{foo,bar}' like 'b%z'", $result); $result = $qp->parse("foo ? 'bar'"); - $this->assertEquals("property_exists(\$node->nodeValue->foo ?? null,'bar')", $result); + $this->assertEquals("nodes.data #> '{foo}' ? 'bar'", $result); $result = $qp->parse("foo.bar>3"); - $this->assertEquals("( \$node->nodeValue->foo->bar ?? null ) >3",$result); + $this->assertEquals("nodes.data #>> '{foo,bar}' > 3",$result); $result = $qp->parse("foo.bar <> 'bar\\'bar'"); - $this->assertEquals("( \$node->nodeValue->foo->bar ?? null ) !='bar\\'bar'",$result); + $this->assertEquals("nodes.data #>> '{foo,bar}' <> 'bar\\'bar'",$result); $result = $qp->parse("foo.bar != 'bar\\'bar'"); - $this->assertEquals("( \$node->nodeValue->foo->bar ?? null ) !='bar\\'bar'",$result); + $this->assertEquals("nodes.data #>> '{foo,bar}' != 'bar\\'bar'",$result); $result = $qp->parse("foo.bar !~ 'b%z' and bar.foo = 3"); - $this->assertEquals("!\$like(\$node->nodeValue->foo->bar ?? null,'b%z') && ( \$node->nodeValue->bar->foo ?? null ) ==3", $result); + $this->assertEquals("nodes.data #>> '{foo,bar}' not like 'b%z' and nodes.data #>> '{bar,foo}' = 3", $result); $result = $qp->parse("(foo.bar !~ 'b%z' and bar.foo = 3)"); - $this->assertEquals("(!\$like(\$node->nodeValue->foo->bar ?? null,'b%z') && ( \$node->nodeValue->bar->foo ?? null ) ==3)", $result); - $result = $qp->parse("(foo.bar !~ 'b%z' and bar.foo = 3) or nodes.path='/'"); - $this->assertEquals("(!\$like(\$node->nodeValue->foo->bar ?? null,'b%z') && ( \$node->nodeValue->bar->foo ?? null ) ==3) || ( \$node->getPath() ?? null ) =='/'", $result); + $this->assertEquals("( nodes.data #>> '{foo,bar}' not like 'b%z' and nodes.data #>> '{bar,foo}' = 3 )", $result); + $result = $qp->parse("(foo.bar !~ 'b%z' and bar.foo = 3) or nodes.path = '/'"); + $this->assertEquals("( nodes.data #>> '{foo,bar}' not like 'b%z' and nodes.data #>> '{bar,foo}' = 3 ) or nodes.path = '/'", $result); $result = $qp->parse("not(foo.bar = 'bar')"); - $this->assertEquals("!(( \$node->nodeValue->foo->bar ?? null ) =='bar')", $result); + $this->assertEquals("not ( nodes.data #>> '{foo,bar}' = 'bar' )", $result); } - - function testStoreQuery() + + function testMysqlStoreQuery() { - $qp = new \arc\store\PSQLQueryParser(array('\arc\store','tokenizer')); + $qp = new \arc\store\MySQLQueryParser(array('\arc\store','tokenizer')); $result = $qp->parse("nodes.path='/'"); - $this->assertEquals("nodes.path='/'", $result); + $this->assertEquals("nodes.path = '/'", $result); $result = $qp->parse("foo.bar='baz'"); - $this->assertEquals("nodes.data #>> '{foo,bar}'='baz'", $result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) = 'baz'", $result); $result = $qp->parse("foo.bar !~ 'b%z'"); - $this->assertEquals("nodes.data #>> '{foo,bar}' not like 'b%z'", $result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) not like 'b%z'", $result); $result = $qp->parse("foo.bar ~= 'b%z'"); - $this->assertEquals("nodes.data #>> '{foo,bar}' like 'b%z'", $result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) like 'b%z'", $result); $result = $qp->parse("foo ? 'bar'"); - $this->assertEquals("nodes.data #>> '{foo}'?'bar'", $result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo')) IS NOT NULL 'bar'", $result); $result = $qp->parse("foo.bar>3"); - $this->assertEquals("nodes.data #>> '{foo,bar}'>3",$result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) > 3",$result); $result = $qp->parse("foo.bar <> 'bar\\'bar'"); - $this->assertEquals("nodes.data #>> '{foo,bar}'<>'bar\\'bar'",$result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) <> 'bar\'bar'",$result); $result = $qp->parse("foo.bar != 'bar\\'bar'"); - $this->assertEquals("nodes.data #>> '{foo,bar}'!='bar\\'bar'",$result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) != 'bar\'bar'",$result); $result = $qp->parse("foo.bar !~ 'b%z' and bar.foo = 3"); - $this->assertEquals("nodes.data #>> '{foo,bar}' not like 'b%z' and nodes.data #>> '{bar,foo}'=3", $result); + $this->assertEquals("JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) not like 'b%z' and JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.bar.foo')) = 3", $result); $result = $qp->parse("(foo.bar !~ 'b%z' and bar.foo = 3)"); - $this->assertEquals("(nodes.data #>> '{foo,bar}' not like 'b%z' and nodes.data #>> '{bar,foo}'=3)", $result); - $result = $qp->parse("(foo.bar !~ 'b%z' and bar.foo = 3) or nodes.path='/'"); - $this->assertEquals("(nodes.data #>> '{foo,bar}' not like 'b%z' and nodes.data #>> '{bar,foo}'=3) or nodes.path='/'", $result); + $this->assertEquals("( JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) not like 'b%z' and JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.bar.foo')) = 3 )", $result); + $result = $qp->parse("(foo.bar !~ 'b%z' and bar.foo = 3) or nodes.path = '/'"); + $this->assertEquals("( JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) not like 'b%z' and JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.bar.foo')) = 3 ) or nodes.path = '/'", $result); $result = $qp->parse("not(foo.bar = 'bar')"); - $this->assertEquals("not(nodes.data #>> '{foo,bar}'='bar')", $result); + $this->assertEquals("not ( JSON_UNQUOTE(JSON_EXTRACT( nodes.data, '$.foo.bar')) = 'bar' )", $result); } + function testMaliciousQueries() + { + $qp = new \arc\store\PSQLQueryParser(array('\arc\store','tokenizer')); + $this->expectException(\LogicException::class); + $result = $qp->parse("nodes.path=''/'"); + } + function testStoreParseError() { $qp = new \arc\store\PSQLQueryParser(array('\arc\store','tokenizer')); $this->expectException(\LogicException::class); $result = $qp->parse("just_a_name_with_1_number"); + echo $result."\n"; } function testStoreParseParenthesisError()