diff --git a/src/PHPCR/Util/QOM/Sql2Scanner.php b/src/PHPCR/Util/QOM/Sql2Scanner.php index fd5c563..8639f22 100644 --- a/src/PHPCR/Util/QOM/Sql2Scanner.php +++ b/src/PHPCR/Util/QOM/Sql2Scanner.php @@ -26,13 +26,6 @@ class Sql2Scanner */ protected $tokens; - /** - * Delimiters between tokens. - * - * @var array - */ - protected $delimiters; - /** * Parsing position in the SQL string. * @@ -68,16 +61,6 @@ public function lookupNextToken($offset = 0) return ''; } - /** - * Get the delimiter that separated the two previous tokens. - * - * @return string - */ - public function getPreviousDelimiter() - { - return isset($this->delimiters[$this->curpos - 1]) ? $this->delimiters[$this->curpos - 1] : ' '; - } - /** * Get the next token and remove it from the queue. * Return an empty string when there are no more tokens. @@ -116,12 +99,12 @@ public function expectToken($token, $case_insensitive = true) * Expect the next tokens to be the one given in the array of tokens and * throws an exception if it's not the case. * - * @see expectToken - * * @param array $tokens * @param bool $case_insensitive * * @throws InvalidQueryException + * + * @see expectToken */ public function expectTokens($tokens, $case_insensitive = true) { @@ -151,7 +134,7 @@ public function tokenIs($token, $value, $case_insensitive = true) } /** - * Scan a SQL2 string a extract the tokens. + * Scan a SQL2 string and extract the tokens. * * @param string $sql2 * @@ -160,49 +143,72 @@ public function tokenIs($token, $value, $case_insensitive = true) protected function scan($sql2) { $tokens = []; - $token = strtok($sql2, " \n\t"); - while ($token !== false) { - $this->tokenize($tokens, $token); - $token = strtok(" \n\t"); + $currentToken = ''; + $tokenEndChars = ['.', ',', '(', ')', '=']; + + $stringStartCharacter = false; + $isEscaped = false; + $escapedQuotesCount = 0; + foreach (\str_split($sql2) as $index => $character) { + if (!$stringStartCharacter && in_array($character, [' ', "\t", "\n"], true)) { + if ($currentToken !== '') { + $tokens[] = $currentToken; + } + $currentToken = ''; + continue; + } + if (!$stringStartCharacter && in_array($character, $tokenEndChars, true)) { + if ($currentToken !== '') { + $tokens[] = $currentToken; + } + $tokens[] = $character; + $currentToken = ''; + continue; + } + $currentToken .= $character; + + if (!$isEscaped && in_array($character, ['"', "'"], true)) { + // Checking if the previous or next value is a ' to handle the weird SQL strings + // This will not check if the amount of quotes is even + $nextCharacter = $this->getCharacterAtIndex($sql2, $index + 1); + if ($character === "'" && $nextCharacter === "'") { + $isEscaped = true; + $escapedQuotesCount++; + continue; + } + // If the escaped quotes are not paired up. eg. "I'''m cool" would be a parsing error + if ($escapedQuotesCount % 2 == 1 && $stringStartCharacter !== "'") { + throw new InvalidQueryException("Syntax error: Number of single quotes to be even: $currentToken"); + } + if ($character === $stringStartCharacter) { + // reached the end of the string + $stringStartCharacter = false; + $tokens[] = $currentToken; + $currentToken = ''; + } elseif (!$stringStartCharacter) { + // If there is no start character already we have found the beginning of a new string + $stringStartCharacter = $character; + } + } + $isEscaped = $character === '\\'; } - - $regexpTokens = []; - foreach ($tokens as $token) { - $regexpTokens[] = preg_quote($token, '/'); + if ($currentToken !== '') { + $tokens[] = $currentToken; } - $regexp = '/^'.implode('([ \t\n]*)', $regexpTokens).'$/'; - preg_match($regexp, $sql2, $this->delimiters); - $this->delimiters[0] = ''; + if ($stringStartCharacter) { + throw new InvalidQueryException("Syntax error: unterminated quoted string $currentToken in '$sql2'"); + } return $tokens; } - /** - * Tokenize a string returned by strtok to split the string at '.', ',', '(', '=' - * and ')' characters. - * - * @param array $tokens - * @param string $token - */ - protected function tokenize(&$tokens, $token) + private function getCharacterAtIndex($string, $index) { - $buffer = ''; - for ($i = 0; $i < strlen($token); $i++) { - $char = trim(substr($token, $i, 1)); - if (in_array($char, ['.', ',', '(', ')', '='])) { - if ($buffer !== '') { - $tokens[] = $buffer; - $buffer = ''; - } - $tokens[] = $char; - } else { - $buffer .= $char; - } + if ($index < strlen($string)) { + return $string[$index]; } - if ($buffer !== '') { - $tokens[] = $buffer; - } + return ''; } } diff --git a/src/PHPCR/Util/QOM/Sql2ToQomQueryConverter.php b/src/PHPCR/Util/QOM/Sql2ToQomQueryConverter.php index d3a7a46..a859ebd 100644 --- a/src/PHPCR/Util/QOM/Sql2ToQomQueryConverter.php +++ b/src/PHPCR/Util/QOM/Sql2ToQomQueryConverter.php @@ -756,27 +756,13 @@ protected function parseCastLiteral($token) $this->scanner->expectToken('('); $token = $this->scanner->fetchNextToken(); - $quoteString = false; - if (substr($token, 0, 1) === '\'') { - $quoteString = "'"; - } elseif (substr($token, 0, 1) === '"') { - $quoteString = '"'; - } + $quoteString = in_array($token[0], ['\'', '"'], true); if ($quoteString) { - while (substr($token, -1) !== $quoteString) { - $nextToken = $this->scanner->fetchNextToken(); - if ('' === $nextToken) { - break; - } - $token .= $nextToken; - } - - if (substr($token, -1) !== $quoteString) { - throw new InvalidQueryException("Syntax error: unterminated quoted string '$token' in '{$this->sql2}'"); - } + $quotesUsed = $token[0]; $token = substr($token, 1, -1); - $token = str_replace('\\'.$quoteString, $quoteString, $token); + // Un-escaping quotes + $token = str_replace('\\'.$quotesUsed, $quotesUsed, $token); } $this->scanner->expectToken('AS'); @@ -813,28 +799,13 @@ protected function parseLiteralValue() return $this->parseCastLiteral($token); } - $quoteString = false; - if (substr($token, 0, 1) === '\'') { - $quoteString = "'"; - } elseif (substr($token, 0, 1) === '"') { - $quoteString = '"'; - } + $quoteString = in_array($token[0], ['"', "'"], true); if ($quoteString) { - while (substr($token, -1) !== $quoteString) { - $nextToken = $this->scanner->fetchNextToken(); - if ('' === $nextToken) { - break; - } - $token .= $this->scanner->getPreviousDelimiter(); - $token .= $nextToken; - } - - if (substr($token, -1) !== $quoteString) { - throw new InvalidQueryException("Syntax error: unterminated quoted string $token in '{$this->sql2}'"); - } + $quotesUsed = $token[0]; $token = substr($token, 1, -1); - $token = str_replace('\\'.$quoteString, $quoteString, $token); + // Unescape quotes + $token = str_replace('\\'.$quotesUsed, $quotesUsed, $token); $token = str_replace("''", "'", $token); if (preg_match('/^\d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d+)?$/', $token)) { if (preg_match('/^\d{4}-\d{2}-\d{2}$/', $token)) { diff --git a/tests/PHPCR/Tests/Util/QOM/Sql2ScannerTest.php b/tests/PHPCR/Tests/Util/QOM/Sql2ScannerTest.php index d3bda9c..07da5dd 100644 --- a/tests/PHPCR/Tests/Util/QOM/Sql2ScannerTest.php +++ b/tests/PHPCR/Tests/Util/QOM/Sql2ScannerTest.php @@ -2,6 +2,7 @@ namespace PHPCR\Tests\Util\QOM; +use PHPCR\Query\InvalidQueryException; use PHPCR\Util\QOM\Sql2Scanner; use PHPUnit\Framework\TestCase; @@ -21,27 +22,128 @@ public function testToken() 'page', ]; - while ($token = $scanner->fetchNextToken()) { - $this->assertEquals(array_shift($expected), $token); - } + $this->expectTokensFromScanner($scanner, $expected); } - public function testDelimiter() + /** + * @dataProvider dataTestStringTokenization + */ + public function testStringTokenization() { - $scanner = new Sql2Scanner('SELECT page.* FROM [nt:unstructured] AS page'); + $scanner = new Sql2Scanner('SELECT page.* FROM [nt:unstructured] AS page WHERE name ="Hello world"'); $expected = [ - '', - ' ', - '', - '', - ' ', - ' ', - ' ', - ' ', + 'SELECT', + 'page', + '.', + '*', + 'FROM', + '[nt:unstructured]', + 'AS', + 'page', + 'WHERE', + 'name', + '=', + '"Hello world"', ]; + $this->expectTokensFromScanner($scanner, $expected); + } + + public function dataTestStringTokenization() + { + $multilineQuery = <<<'SQL' +SELECT page.* +FROM [nt:unstructured] AS page +WHERE name ="Hello world" +SQL; + + return [ + 'single line query' => ['SELECT page.* FROM [nt:unstructured] AS page WHERE name ="Hello world"'], + 'multi line query' => [$multilineQuery], + ]; + } + + public function testEscapingStrings() + { + $sql = <<expectTokensFromScanner($scanner, $expected); + } + + public function testSQLEscapedStrings() + { + $sql = "WHERE page.name = 'Hello, it''s me.'"; + + $scanner = new Sql2Scanner($sql); + $expected = [ + 'WHERE', + 'page', + '.', + 'name', + '=', + "'Hello, it''s me.'", + ]; + + $this->expectTokensFromScanner($scanner, $expected); + } + + public function testSQLEscapedStrings2() + { + $sql = "WHERE page.name = 'Hello, it''' AND"; + + $scanner = new Sql2Scanner($sql); + $expected = [ + 'WHERE', + 'page', + '.', + 'name', + '=', + "'Hello, it'''", + 'AND', + ]; + + $this->expectTokensFromScanner($scanner, $expected); + } + + public function testThrowingErrorOnUnclosedString() + { + $this->expectException(InvalidQueryException::class); + new Sql2Scanner('SELECT page.* FROM [nt:unstructured] AS page WHERE name ="Hello '); + } + + /** + * Function to assert that the tokens the scanner finds match the expected output + * and the entire expected output is consumed. + * + * @param Sql2Scanner $scanner + * @param array $expected + */ + private function expectTokensFromScanner(Sql2Scanner $scanner, array $expected) + { + $actualTokens = []; while ($token = $scanner->fetchNextToken()) { - $this->assertEquals(array_shift($expected), $scanner->getPreviousDelimiter()); + $actualTokens[] = $token; } + + $this->assertEquals($expected, $actualTokens); } }