From aa3b7cf57a1b10e4181af48e32984f403fb96821 Mon Sep 17 00:00:00 2001 From: Ryan Weaver Date: Wed, 12 Apr 2023 09:59:51 -0400 Subject: [PATCH] Expanding Twig syntax to rely less on regex for better errors --- src/TwigComponent/src/Twig/ComponentLexer.php | 132 +------- src/TwigComponent/src/Twig/TwigPreLexer.php | 287 ++++++++++++++++++ .../tests/Unit/TwigPreLexerTest.php | 65 ++++ 3 files changed, 354 insertions(+), 130 deletions(-) create mode 100644 src/TwigComponent/src/Twig/TwigPreLexer.php create mode 100644 src/TwigComponent/tests/Unit/TwigPreLexerTest.php diff --git a/src/TwigComponent/src/Twig/ComponentLexer.php b/src/TwigComponent/src/Twig/ComponentLexer.php index 16b9cc67323..519e4290899 100644 --- a/src/TwigComponent/src/Twig/ComponentLexer.php +++ b/src/TwigComponent/src/Twig/ComponentLexer.php @@ -25,18 +25,10 @@ */ class ComponentLexer extends Lexer { - public const ATTRIBUTES_REGEX = '(?(?:\s+[\w\-:.@]+(=(?:\\\"[^\\\"]*\\\"|\'[^\']*\'|[^\'\\\"=<>]+))?)*\s*)'; - public const OPEN_TAGS_REGEX = '/<\s*t:(?([[\w\-\:\.]+))\s*'.self::ATTRIBUTES_REGEX.'(\s?)+>/'; - public const CLOSE_TAGS_REGEX = '/<\/\s*t:([\w\-\:\.]+)\s*>/'; - public const SELF_CLOSE_TAGS_REGEX = '/<\s*t:(?([\w\-\:\.]+))\s*'.self::ATTRIBUTES_REGEX.'(\s?)+\/>/'; - public const BLOCK_TAGS_OPEN = '/<\s*t:block\s+name=("|\')(?([\w\-\:\.]+))("|\')\s*>/'; - public const BLOCK_TAGS_CLOSE = '/<\s*\/\s*t:block\s*>/'; - public const ATTRIBUTE_BAG_REGEX = '/(?:^|\s+)\{\{\s*(attributes(?:.+?(?[\w\-:.@]+)(=(?(\"[^\"]+\"|\\\'[^\\\']+\\\'|[^\s>]+)))?/x'; - public function tokenize(Source $source): TokenStream { - $preparsed = $this->preparsed($source->getCode()); + $preLexer = new TwigPreLexer(); + $preparsed = $preLexer->preLexComponents($source->getCode()); return parent::tokenize( new Source( @@ -46,124 +38,4 @@ public function tokenize(Source $source): TokenStream ) ); } - - private function preparsed(string $value) - { - $value = $this->lexBlockTags($value); - $value = $this->lexBlockTagsClose($value); - $value = $this->lexSelfCloseTag($value); - $value = $this->lexOpeningTags($value); - $value = $this->lexClosingTag($value); - - return $value; - } - - private function lexOpeningTags(string $value) - { - return preg_replace_callback( - self::OPEN_TAGS_REGEX, - function (array $matches) { - $name = $matches['name']; - $attributes = $this->getAttributesFromAttributeString($matches['attributes']); - - return '{% component '.$name.' with '.$attributes.'%}'; - }, - $value - ); - } - - private function lexClosingTag(string $value) - { - return preg_replace(self::CLOSE_TAGS_REGEX, '{% endcomponent %}', $value); - } - - private function lexSelfCloseTag(string $value) - { - return preg_replace_callback( - self::SELF_CLOSE_TAGS_REGEX, - function (array $matches) { - $name = $matches['name']; - $attributes = $this->getAttributesFromAttributeString($matches['attributes']); - - return "{{ component('".$name."', ".$attributes.') }}'; - }, - $value - ); - } - - private function lexBlockTags(string $value) - { - return preg_replace_callback( - self::BLOCK_TAGS_OPEN, - function (array $matches) { - $name = $matches['name']; - - return '{% block '.$name.' %}'; - }, - $value - ); - } - - private function lexBlockTagsClose(string $value) - { - return preg_replace( - self::BLOCK_TAGS_CLOSE, - '{% endblock %}', - $value - ); - } - - protected function getAttributesFromAttributeString(string $attributeString) - { - $attributeString = $this->parseAttributeBag($attributeString); - - if (!preg_match_all(self::ATTRIBUTE_KEY_VALUE_REGEX, $attributeString, $matches, \PREG_SET_ORDER)) { - return '{}'; - } - - $attributes = []; - foreach ($matches as $match) { - $attribute = $match['attribute']; - $value = $match['value'] ?? null; - - if (null === $value) { - $value = 'true'; - } - - if (str_starts_with($attribute, ':')) { - $attribute = str_replace(':', '', $attribute); - $value = $this->stripQuotes($value); - } - - $valueWithoutQuotes = $this->stripQuotes($value); - - if (str_starts_with($valueWithoutQuotes, '{{') && (strpos($valueWithoutQuotes, '}}') === \strlen($valueWithoutQuotes) - 2)) { - $value = substr($valueWithoutQuotes, 2, -2); - } else { - $value = $value; - } - - $attributes[$attribute] = $value; - } - - $out = '{'; - foreach ($attributes as $key => $value) { - $key = "'$key'"; - $out .= "$key: $value,"; - } - - return rtrim($out, ',').'}'; - } - - public function stripQuotes(string $value) - { - return str_starts_with($value, '"') || str_starts_with($value, '\'') - ? substr($value, 1, -1) - : $value; - } - - protected function parseAttributeBag(string $attributeString) - { - return preg_replace(self::ATTRIBUTE_BAG_REGEX, ' :attributes="$1"', $attributeString); - } } diff --git a/src/TwigComponent/src/Twig/TwigPreLexer.php b/src/TwigComponent/src/Twig/TwigPreLexer.php new file mode 100644 index 00000000000..c450b3112c7 --- /dev/null +++ b/src/TwigComponent/src/Twig/TwigPreLexer.php @@ -0,0 +1,287 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\UX\TwigComponent\Twig; + +/** + * Rewrites syntaxes to {% component %} syntaxes. + */ +class TwigPreLexer +{ + private string $input; + private int $length; + private int $position = 0; + private int $line; + /** @var string[] */ + private array $currentComponents = []; + + public function __construct(int $startingLine = 1) + { + $this->line = $startingLine; + } + + public function preLexComponents(string $input): string + { + $this->input = $input; + $this->length = strlen($input); + $output = ''; + + while ($this->position < $this->length) { + if ($this->consume('consumeComponentName(); + + if ($componentName === 'block') { + $output .= $this->consumeBlock(); + + continue; + } + + $attributes = $this->consumeAttributes(); + $isSelfClosing = $this->consume('/>'); + if (!$isSelfClosing) { + $this->consume('>'); + $this->currentComponents[] = $componentName; + } + + $output .= "{% component {$componentName}" . ($attributes ? " with { {$attributes} }" : '') . " %}"; + if ($isSelfClosing) { + $output .= '{% endcomponent %}'; + } + + continue; + } + + if (!empty($this->currentComponents) && $this->check('consume('consumeComponentName(); + $this->consume('>'); + + $lastComponent = array_pop($this->currentComponents); + + if ($closingComponentName !== $lastComponent) { + throw new \RuntimeException("Expected closing tag '' but found '' at line {$this->line}"); + } + + $output .= "{% endcomponent %}"; + + continue; + } + + $char = $this->consumeChar(); + if ($char === "\n") { + $this->line++; + } + $output .= $char; + } + + return $output; + } + + private function consumeComponentName(): string + { + $start = $this->position; + while ($this->position < $this->length && preg_match('/[A-Za-z0-9_]/', $this->input[$this->position])) { + $this->position++; + } + $componentName = substr($this->input, $start, $this->position - $start); + + if (empty($componentName)) { + throw new \RuntimeException("Expected component name at line {$this->line}"); + } + + return $componentName; + } + + private function consumeAttributes(): string + { + $attributes = []; + + while ($this->position < $this->length && !$this->check('>') && !$this->check('/>')) { + $this->consumeWhitespace(); + if ($this->check('>') || $this->check('/>')) { + break; + } + + $isAttributeDynamic = false; + + // :someProp="dynamicVar" + if ($this->check(':')) { + $this->consume(':'); + $isAttributeDynamic = true; + } + + $key = $this->consumeComponentName(); + + // -> someProp: true + if (!$this->check('=')) { + $attributes[] = sprintf("%s: true", $key); + $this->consumeWhitespace(); + continue; + } + + $this->expectAndConsumeChar('='); + $quote = $this->consumeChar(["'", '"']); + + // someProp="{{ dynamicVar }}" + if ($this->consume('{{')) { + $this->consumeWhitespace(); + $attributeValue = rtrim($this->consumeUntil('}')); + $this->expectAndConsumeChar('}'); + $this->expectAndConsumeChar('}'); + $this->consumeUntil($quote); + $isAttributeDynamic = true; + } else { + $attributeValue = $this->consumeUntil($quote); + } + $this->expectAndConsumeChar($quote); + + if ($isAttributeDynamic) { + $attributes[] = sprintf("%s: %s", $key, $attributeValue); + } else { + $attributes[] = sprintf("%s: '%s'", $key, str_replace("'", "\'", $attributeValue)); + } + + $this->consumeWhitespace(); + } + + return implode(', ', $attributes); + } + + private function consume(string $string): bool + { + if (substr($this->input, $this->position, strlen($string)) === $string) { + $this->position += strlen($string); + return true; + } + + return false; + } + + private function consumeChar($validChars = null): string + { + if ($this->position >= $this->length) { + throw new \RuntimeException("Unexpected end of input"); + } + + $char = $this->input[$this->position]; + + if ($validChars !== null && !in_array($char, (array)$validChars, true)) { + throw new \RuntimeException("Expected one of [" . implode('', (array)$validChars) . "] but found '{$char}' at line {$this->line}"); + } + + $this->position++; + + return $char; + } + + private function consumeUntil(string $endString): string + { + $start = $this->position; + $endCharLength = strlen($endString); + + while ($this->position < $this->length) { + if (substr($this->input, $this->position, $endCharLength) === $endString) { + break; + } + + if ($this->input[$this->position] === "\n") { + $this->line++; + } + $this->position++; + } + + return substr($this->input, $start, $this->position - $start); + } + + private function consumeWhitespace(): void + { + while ($this->position < $this->length && preg_match('/\s/', $this->input[$this->position])) { + if ($this->input[$this->position] === "\n") { + $this->line++; + } + $this->position++; + } + } + + /** + * Checks that the next character is the one given and consumes it. + */ + private function expectAndConsumeChar(string $char): void + { + if (strlen($char) !== 1) { + throw new \InvalidArgumentException('Expected a single character'); + } + + if ($this->position >= $this->length || $this->input[$this->position] !== $char) { + throw new \RuntimeException("Expected '{$char}' but found '{$this->input[$this->position]}' at line {$this->line}"); + } + $this->position++; + } + + private function check(string $chars): bool + { + $charsLength = strlen($chars); + if ($this->position + $charsLength > $this->length) { + return false; + } + + for ($i = 0; $i < $charsLength; $i++) { + if ($this->input[$this->position + $i] !== $chars[$i]) { + return false; + } + } + + return true; + } + + private function consumeBlock(): string + { + $attributes = $this->consumeAttributes(); + $this->consume('>'); + + $blockName = ''; + foreach (explode(', ', $attributes) as $attr) { + list($key, $value) = explode(': ', $attr); + if ($key === 'name') { + $blockName = trim($value, "'"); + break; + } + } + + if (empty($blockName)) { + throw new \RuntimeException("Expected block name at line {$this->line}"); + } + + $output = "{% block {$blockName} %}"; + + $closingTag = ""; + if (!$this->doesStringEventuallyExist($closingTag)) { + throw new \RuntimeException("Expected closing tag '{$closingTag}' for block '{$blockName}' at line {$this->line}"); + } + $blockContents = $this->consumeUntil($closingTag); + + $subLexer = new self($this->line); + $output .= $subLexer->preLexComponents($blockContents); + + $this->consume($closingTag); + $output .= "{% endblock %}"; + + return $output; + } + + private function doesStringEventuallyExist(string $needle): bool + { + $remainingString = substr($this->input, $this->position); + + return str_contains($remainingString, $needle); + } +} + diff --git a/src/TwigComponent/tests/Unit/TwigPreLexerTest.php b/src/TwigComponent/tests/Unit/TwigPreLexerTest.php new file mode 100644 index 00000000000..5231f99ca26 --- /dev/null +++ b/src/TwigComponent/tests/Unit/TwigPreLexerTest.php @@ -0,0 +1,65 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\UX\TwigComponent\Tests\Unit; + +use PHPUnit\Framework\TestCase; +use Symfony\UX\TwigComponent\Twig\TwigPreLexer; + +final class TwigPreLexerTest extends TestCase +{ + /** + * @dataProvider getLexTests + */ + public function testPreLex(string $input, string $expectedOutput): void + { + $lexer = new TwigPreLexer(); + $this->assertSame($expectedOutput, $lexer->preLexComponents($input)); + } + + public function getLexTests(): iterable + { + yield 'simple_component' => [ + '', + "{% component foo %}{% endcomponent %}", + ]; + + yield 'component_with_attributes' => [ + '', + "{% component foo with { bar: 'baz', with_quotes: 'It\'s with quotes' } %}{% endcomponent %}", + ]; + + yield 'component_with_dynamic_attributes' => [ + '', + "{% component foo with { dynamic: dynamicVar, otherDynamic: anotherVar } %}{% endcomponent %}", + ]; + + yield 'component_with_closing_tag' => [ + '', + "{% component foo %}{% endcomponent %}", + ]; + + yield 'component_with_block' => [ + 'Foo', + "{% component foo %}{% block foo_block %}Foo{% endblock %}{% endcomponent %}", + ]; + + yield 'component_with_embedded_component_inside_block' => [ + '', + "{% component foo %}{% block foo_block %}{% component bar %}{% endcomponent %}{% endblock %}{% endcomponent %}", + ]; + + yield 'attribute_with_no_value' => [ + '', + "{% component foo with { bar: true } %}{% endcomponent %}", + ]; + } +}