vendor/twig/twig/src/Lexer.php line 108

Open in your IDE?
  1. <?php
  2. /*
  3.  * This file is part of Twig.
  4.  *
  5.  * (c) Fabien Potencier
  6.  * (c) Armin Ronacher
  7.  *
  8.  * For the full copyright and license information, please view the LICENSE
  9.  * file that was distributed with this source code.
  10.  */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14.  * @author Fabien Potencier <fabien@symfony.com>
  15.  */
  16. class Lexer
  17. {
  18.     private $tokens;
  19.     private $code;
  20.     private $cursor;
  21.     private $lineno;
  22.     private $end;
  23.     private $state;
  24.     private $states;
  25.     private $brackets;
  26.     private $env;
  27.     private $source;
  28.     private $options;
  29.     private $regexes;
  30.     private $position;
  31.     private $positions;
  32.     private $currentVarBlockLine;
  33.     public const STATE_DATA 0;
  34.     public const STATE_BLOCK 1;
  35.     public const STATE_VAR 2;
  36.     public const STATE_STRING 3;
  37.     public const STATE_INTERPOLATION 4;
  38.     public const REGEX_NAME '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  39.     public const REGEX_NUMBER '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  40.     public const REGEX_STRING '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  41.     public const REGEX_DQ_STRING_DELIM '/"/A';
  42.     public const REGEX_DQ_STRING_PART '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  43.     public const PUNCTUATION '()[]{}?:.,|';
  44.     public function __construct(Environment $env, array $options = [])
  45.     {
  46.         $this->env $env;
  47.         $this->options array_merge([
  48.             'tag_comment' => ['{#''#}'],
  49.             'tag_block' => ['{%''%}'],
  50.             'tag_variable' => ['{{''}}'],
  51.             'whitespace_trim' => '-',
  52.             'whitespace_line_trim' => '~',
  53.             'whitespace_line_chars' => ' \t\0\x0B',
  54.             'interpolation' => ['#{''}'],
  55.         ], $options);
  56.         // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  57.         $this->regexes = [
  58.             // }}
  59.             'lex_var' => '{
  60.                 \s*
  61.                 (?:'.
  62.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'// -}}\s*
  63.                     '|'.
  64.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~}}[ \t\0\x0B]*
  65.                     '|'.
  66.                     preg_quote($this->options['tag_variable'][1], '#'). // }}
  67.                 ')
  68.             }Ax',
  69.             // %}
  70.             'lex_block' => '{
  71.                 \s*
  72.                 (?:'.
  73.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'// -%}\s*\n?
  74.                     '|'.
  75.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  76.                     '|'.
  77.                     preg_quote($this->options['tag_block'][1], '#').'\n?'// %}\n?
  78.                 ')
  79.             }Ax',
  80.             // {% endverbatim %}
  81.             'lex_raw_data' => '{'.
  82.                 preg_quote($this->options['tag_block'][0], '#'). // {%
  83.                 '('.
  84.                     $this->options['whitespace_trim']. // -
  85.                     '|'.
  86.                     $this->options['whitespace_line_trim']. // ~
  87.                 ')?\s*endverbatim\s*'.
  88.                 '(?:'.
  89.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}
  90.                     '|'.
  91.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  92.                     '|'.
  93.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  94.                 ')
  95.             }sx',
  96.             'operator' => $this->getOperatorRegex(),
  97.             // #}
  98.             'lex_comment' => '{
  99.                 (?:'.
  100.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'// -#}\s*\n?
  101.                     '|'.
  102.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~#}[ \t\0\x0B]*
  103.                     '|'.
  104.                     preg_quote($this->options['tag_comment'][1], '#').'\n?'// #}\n?
  105.                 ')
  106.             }sx',
  107.             // verbatim %}
  108.             'lex_block_raw' => '{
  109.                 \s*verbatim\s*
  110.                 (?:'.
  111.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}\s*
  112.                     '|'.
  113.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  114.                     '|'.
  115.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  116.                 ')
  117.             }Asx',
  118.             'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  119.             // {{ or {% or {#
  120.             'lex_tokens_start' => '{
  121.                 ('.
  122.                     preg_quote($this->options['tag_variable'][0], '#'). // {{
  123.                     '|'.
  124.                     preg_quote($this->options['tag_block'][0], '#'). // {%
  125.                     '|'.
  126.                     preg_quote($this->options['tag_comment'][0], '#'). // {#
  127.                 ')('.
  128.                     preg_quote($this->options['whitespace_trim'], '#'). // -
  129.                     '|'.
  130.                     preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  131.                 ')?
  132.             }sx',
  133.             'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  134.             'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  135.         ];
  136.     }
  137.     public function tokenize(Source $source): TokenStream
  138.     {
  139.         $this->source $source;
  140.         $this->code str_replace(["\r\n""\r"], "\n"$source->getCode());
  141.         $this->cursor 0;
  142.         $this->lineno 1;
  143.         $this->end \strlen($this->code);
  144.         $this->tokens = [];
  145.         $this->state self::STATE_DATA;
  146.         $this->states = [];
  147.         $this->brackets = [];
  148.         $this->position = -1;
  149.         // find all token starts in one go
  150.         preg_match_all($this->regexes['lex_tokens_start'], $this->code$matches\PREG_OFFSET_CAPTURE);
  151.         $this->positions $matches;
  152.         while ($this->cursor $this->end) {
  153.             // dispatch to the lexing functions depending
  154.             // on the current state
  155.             switch ($this->state) {
  156.                 case self::STATE_DATA:
  157.                     $this->lexData();
  158.                     break;
  159.                 case self::STATE_BLOCK:
  160.                     $this->lexBlock();
  161.                     break;
  162.                 case self::STATE_VAR:
  163.                     $this->lexVar();
  164.                     break;
  165.                 case self::STATE_STRING:
  166.                     $this->lexString();
  167.                     break;
  168.                 case self::STATE_INTERPOLATION:
  169.                     $this->lexInterpolation();
  170.                     break;
  171.             }
  172.         }
  173.         $this->pushToken(/* Token::EOF_TYPE */ -1);
  174.         if (!empty($this->brackets)) {
  175.             list($expect$lineno) = array_pop($this->brackets);
  176.             throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  177.         }
  178.         return new TokenStream($this->tokens$this->source);
  179.     }
  180.     private function lexData(): void
  181.     {
  182.         // if no matches are left we return the rest of the template as simple text token
  183.         if ($this->position == \count($this->positions[0]) - 1) {
  184.             $this->pushToken(/* Token::TEXT_TYPE */ 0substr($this->code$this->cursor));
  185.             $this->cursor $this->end;
  186.             return;
  187.         }
  188.         // Find the first token after the current cursor
  189.         $position $this->positions[0][++$this->position];
  190.         while ($position[1] < $this->cursor) {
  191.             if ($this->position == \count($this->positions[0]) - 1) {
  192.                 return;
  193.             }
  194.             $position $this->positions[0][++$this->position];
  195.         }
  196.         // push the template text first
  197.         $text $textContent substr($this->code$this->cursor$position[1] - $this->cursor);
  198.         // trim?
  199.         if (isset($this->positions[2][$this->position][0])) {
  200.             if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  201.                 // whitespace_trim detected ({%-, {{- or {#-)
  202.                 $text rtrim($text);
  203.             } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  204.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  205.                 // don't trim \r and \n
  206.                 $text rtrim($text" \t\0\x0B");
  207.             }
  208.         }
  209.         $this->pushToken(/* Token::TEXT_TYPE */ 0$text);
  210.         $this->moveCursor($textContent.$position[0]);
  211.         switch ($this->positions[1][$this->position][0]) {
  212.             case $this->options['tag_comment'][0]:
  213.                 $this->lexComment();
  214.                 break;
  215.             case $this->options['tag_block'][0]:
  216.                 // raw data?
  217.                 if (preg_match($this->regexes['lex_block_raw'], $this->code$match0$this->cursor)) {
  218.                     $this->moveCursor($match[0]);
  219.                     $this->lexRawData();
  220.                 // {% line \d+ %}
  221.                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code$match0$this->cursor)) {
  222.                     $this->moveCursor($match[0]);
  223.                     $this->lineno = (int) $match[1];
  224.                 } else {
  225.                     $this->pushToken(/* Token::BLOCK_START_TYPE */ 1);
  226.                     $this->pushState(self::STATE_BLOCK);
  227.                     $this->currentVarBlockLine $this->lineno;
  228.                 }
  229.                 break;
  230.             case $this->options['tag_variable'][0]:
  231.                 $this->pushToken(/* Token::VAR_START_TYPE */ 2);
  232.                 $this->pushState(self::STATE_VAR);
  233.                 $this->currentVarBlockLine $this->lineno;
  234.                 break;
  235.         }
  236.     }
  237.     private function lexBlock(): void
  238.     {
  239.         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code$match0$this->cursor)) {
  240.             $this->pushToken(/* Token::BLOCK_END_TYPE */ 3);
  241.             $this->moveCursor($match[0]);
  242.             $this->popState();
  243.         } else {
  244.             $this->lexExpression();
  245.         }
  246.     }
  247.     private function lexVar(): void
  248.     {
  249.         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code$match0$this->cursor)) {
  250.             $this->pushToken(/* Token::VAR_END_TYPE */ 4);
  251.             $this->moveCursor($match[0]);
  252.             $this->popState();
  253.         } else {
  254.             $this->lexExpression();
  255.         }
  256.     }
  257.     private function lexExpression(): void
  258.     {
  259.         // whitespace
  260.         if (preg_match('/\s+/A'$this->code$match0$this->cursor)) {
  261.             $this->moveCursor($match[0]);
  262.             if ($this->cursor >= $this->end) {
  263.                 throw new SyntaxError(sprintf('Unclosed "%s".'self::STATE_BLOCK === $this->state 'block' 'variable'), $this->currentVarBlockLine$this->source);
  264.             }
  265.         }
  266.         // arrow function
  267.         if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor 1]) {
  268.             $this->pushToken(Token::ARROW_TYPE'=>');
  269.             $this->moveCursor('=>');
  270.         }
  271.         // operators
  272.         elseif (preg_match($this->regexes['operator'], $this->code$match0$this->cursor)) {
  273.             $this->pushToken(/* Token::OPERATOR_TYPE */ 8preg_replace('/\s+/'' '$match[0]));
  274.             $this->moveCursor($match[0]);
  275.         }
  276.         // names
  277.         elseif (preg_match(self::REGEX_NAME$this->code$match0$this->cursor)) {
  278.             $this->pushToken(/* Token::NAME_TYPE */ 5$match[0]);
  279.             $this->moveCursor($match[0]);
  280.         }
  281.         // numbers
  282.         elseif (preg_match(self::REGEX_NUMBER$this->code$match0$this->cursor)) {
  283.             $number = (float) $match[0];  // floats
  284.             if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) {
  285.                 $number = (int) $match[0]; // integers lower than the maximum
  286.             }
  287.             $this->pushToken(/* Token::NUMBER_TYPE */ 6$number);
  288.             $this->moveCursor($match[0]);
  289.         }
  290.         // punctuation
  291.         elseif (false !== strpos(self::PUNCTUATION$this->code[$this->cursor])) {
  292.             // opening bracket
  293.             if (false !== strpos('([{'$this->code[$this->cursor])) {
  294.                 $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  295.             }
  296.             // closing bracket
  297.             elseif (false !== strpos(')]}'$this->code[$this->cursor])) {
  298.                 if (empty($this->brackets)) {
  299.                     throw new SyntaxError(sprintf('Unexpected "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  300.                 }
  301.                 list($expect$lineno) = array_pop($this->brackets);
  302.                 if ($this->code[$this->cursor] != strtr($expect'([{'')]}')) {
  303.                     throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  304.                 }
  305.             }
  306.             $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9$this->code[$this->cursor]);
  307.             ++$this->cursor;
  308.         }
  309.         // strings
  310.         elseif (preg_match(self::REGEX_STRING$this->code$match0$this->cursor)) {
  311.             $this->pushToken(/* Token::STRING_TYPE */ 7stripcslashes(substr($match[0], 1, -1)));
  312.             $this->moveCursor($match[0]);
  313.         }
  314.         // opening double quoted string
  315.         elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  316.             $this->brackets[] = ['"'$this->lineno];
  317.             $this->pushState(self::STATE_STRING);
  318.             $this->moveCursor($match[0]);
  319.         }
  320.         // unlexable
  321.         else {
  322.             throw new SyntaxError(sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  323.         }
  324.     }
  325.     private function lexRawData(): void
  326.     {
  327.         if (!preg_match($this->regexes['lex_raw_data'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  328.             throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.'$this->lineno$this->source);
  329.         }
  330.         $text substr($this->code$this->cursor$match[0][1] - $this->cursor);
  331.         $this->moveCursor($text.$match[0][0]);
  332.         // trim?
  333.         if (isset($match[1][0])) {
  334.             if ($this->options['whitespace_trim'] === $match[1][0]) {
  335.                 // whitespace_trim detected ({%-, {{- or {#-)
  336.                 $text rtrim($text);
  337.             } else {
  338.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  339.                 // don't trim \r and \n
  340.                 $text rtrim($text" \t\0\x0B");
  341.             }
  342.         }
  343.         $this->pushToken(/* Token::TEXT_TYPE */ 0$text);
  344.     }
  345.     private function lexComment(): void
  346.     {
  347.         if (!preg_match($this->regexes['lex_comment'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  348.             throw new SyntaxError('Unclosed comment.'$this->lineno$this->source);
  349.         }
  350.         $this->moveCursor(substr($this->code$this->cursor$match[0][1] - $this->cursor).$match[0][0]);
  351.     }
  352.     private function lexString(): void
  353.     {
  354.         if (preg_match($this->regexes['interpolation_start'], $this->code$match0$this->cursor)) {
  355.             $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  356.             $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10);
  357.             $this->moveCursor($match[0]);
  358.             $this->pushState(self::STATE_INTERPOLATION);
  359.         } elseif (preg_match(self::REGEX_DQ_STRING_PART$this->code$match0$this->cursor) && \strlen($match[0]) > 0) {
  360.             $this->pushToken(/* Token::STRING_TYPE */ 7stripcslashes($match[0]));
  361.             $this->moveCursor($match[0]);
  362.         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  363.             list($expect$lineno) = array_pop($this->brackets);
  364.             if ('"' != $this->code[$this->cursor]) {
  365.                 throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  366.             }
  367.             $this->popState();
  368.             ++$this->cursor;
  369.         } else {
  370.             // unlexable
  371.             throw new SyntaxError(sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  372.         }
  373.     }
  374.     private function lexInterpolation(): void
  375.     {
  376.         $bracket end($this->brackets);
  377.         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code$match0$this->cursor)) {
  378.             array_pop($this->brackets);
  379.             $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11);
  380.             $this->moveCursor($match[0]);
  381.             $this->popState();
  382.         } else {
  383.             $this->lexExpression();
  384.         }
  385.     }
  386.     private function pushToken($type$value ''): void
  387.     {
  388.         // do not push empty text tokens
  389.         if (/* Token::TEXT_TYPE */ === $type && '' === $value) {
  390.             return;
  391.         }
  392.         $this->tokens[] = new Token($type$value$this->lineno);
  393.     }
  394.     private function moveCursor($text): void
  395.     {
  396.         $this->cursor += \strlen($text);
  397.         $this->lineno += substr_count($text"\n");
  398.     }
  399.     private function getOperatorRegex(): string
  400.     {
  401.         $operators array_merge(
  402.             ['='],
  403.             array_keys($this->env->getUnaryOperators()),
  404.             array_keys($this->env->getBinaryOperators())
  405.         );
  406.         $operators array_combine($operatorsarray_map('strlen'$operators));
  407.         arsort($operators);
  408.         $regex = [];
  409.         foreach ($operators as $operator => $length) {
  410.             // an operator that ends with a character must be followed by
  411.             // a whitespace, a parenthesis, an opening map [ or sequence {
  412.             $r preg_quote($operator'/');
  413.             if (ctype_alpha($operator[$length 1])) {
  414.                 $r .= '(?=[\s()\[{])';
  415.             }
  416.             // an operator that begins with a character must not have a dot or pipe before
  417.             if (ctype_alpha($operator[0])) {
  418.                 $r '(?<![\.\|])'.$r;
  419.             }
  420.             // an operator with a space can be any amount of whitespaces
  421.             $r preg_replace('/\s+/''\s+'$r);
  422.             $regex[] = $r;
  423.         }
  424.         return '/'.implode('|'$regex).'/A';
  425.     }
  426.     private function pushState($state): void
  427.     {
  428.         $this->states[] = $this->state;
  429.         $this->state $state;
  430.     }
  431.     private function popState(): void
  432.     {
  433.         if (=== \count($this->states)) {
  434.             throw new \LogicException('Cannot pop state without a previous state.');
  435.         }
  436.         $this->state array_pop($this->states);
  437.     }
  438. }