[ Index ]

PHP Cross Reference of phpBB-3.3.0-deutsch

title

Body

[close]

/vendor/twig/twig/src/ -> Lexer.php (source)

   1  <?php
   2  
   3  /*
   4   * This file is part of Twig.
   5   *
   6   * (c) Fabien Potencier
   7   * (c) Armin Ronacher
   8   *
   9   * For the full copyright and license information, please view the LICENSE
  10   * file that was distributed with this source code.
  11   */
  12  
  13  namespace Twig;
  14  
  15  use Twig\Error\SyntaxError;
  16  
  17  /**
  18   * Lexes a template string.
  19   *
  20   * @author Fabien Potencier <fabien@symfony.com>
  21   */
  22  class Lexer
  23  {
  24      private $tokens;
  25      private $code;
  26      private $cursor;
  27      private $lineno;
  28      private $end;
  29      private $state;
  30      private $states;
  31      private $brackets;
  32      private $env;
  33      private $source;
  34      private $options;
  35      private $regexes;
  36      private $position;
  37      private $positions;
  38      private $currentVarBlockLine;
  39  
  40      const STATE_DATA = 0;
  41      const STATE_BLOCK = 1;
  42      const STATE_VAR = 2;
  43      const STATE_STRING = 3;
  44      const STATE_INTERPOLATION = 4;
  45  
  46      const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  47      const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  48      const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  49      const REGEX_DQ_STRING_DELIM = '/"/A';
  50      const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  51      const PUNCTUATION = '()[]{}?:.,|';
  52  
  53      public function __construct(Environment $env, array $options = [])
  54      {
  55          $this->env = $env;
  56  
  57          $this->options = array_merge([
  58              'tag_comment' => ['{#', '#}'],
  59              'tag_block' => ['{%', '%}'],
  60              'tag_variable' => ['{{', '}}'],
  61              'whitespace_trim' => '-',
  62              'whitespace_line_trim' => '~',
  63              'whitespace_line_chars' => ' \t\0\x0B',
  64              'interpolation' => ['#{', '}'],
  65          ], $options);
  66  
  67          // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  68          $this->regexes = [
  69              // }}
  70              'lex_var' => '{
  71                  \s*
  72                  (?:'.
  73                      preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  74                      '|'.
  75                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  76                      '|'.
  77                      preg_quote($this->options['tag_variable'][1], '#'). // }}
  78                  ')
  79              }Ax',
  80  
  81              // %}
  82              'lex_block' => '{
  83                  \s*
  84                  (?:'.
  85                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  86                      '|'.
  87                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  88                      '|'.
  89                      preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  90                  ')
  91              }Ax',
  92  
  93              // {% endverbatim %}
  94              'lex_raw_data' => '{'.
  95                  preg_quote($this->options['tag_block'][0], '#'). // {%
  96                  '('.
  97                      $this->options['whitespace_trim']. // -
  98                      '|'.
  99                      $this->options['whitespace_line_trim']. // ~
 100                  ')?\s*endverbatim\s*'.
 101                  '(?:'.
 102                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
 103                      '|'.
 104                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
 105                      '|'.
 106                      preg_quote($this->options['tag_block'][1], '#'). // %}
 107                  ')
 108              }sx',
 109  
 110              'operator' => $this->getOperatorRegex(),
 111  
 112              // #}
 113              'lex_comment' => '{
 114                  (?:'.
 115                      preg_quote($this->options['whitespace_trim']).preg_quote($this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
 116                      '|'.
 117                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
 118                      '|'.
 119                      preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
 120                  ')
 121              }sx',
 122  
 123              // verbatim %}
 124              'lex_block_raw' => '{
 125                  \s*verbatim\s*
 126                  (?:'.
 127                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
 128                      '|'.
 129                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
 130                      '|'.
 131                      preg_quote($this->options['tag_block'][1], '#'). // %}
 132                  ')
 133              }Asx',
 134  
 135              'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
 136  
 137              // {{ or {% or {#
 138              'lex_tokens_start' => '{
 139                  ('.
 140                      preg_quote($this->options['tag_variable'][0], '#'). // {{
 141                      '|'.
 142                      preg_quote($this->options['tag_block'][0], '#'). // {%
 143                      '|'.
 144                      preg_quote($this->options['tag_comment'][0], '#'). // {#
 145                  ')('.
 146                      preg_quote($this->options['whitespace_trim'], '#'). // -
 147                      '|'.
 148                      preg_quote($this->options['whitespace_line_trim'], '#'). // ~
 149                  ')?
 150              }sx',
 151              'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
 152              'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
 153          ];
 154      }
 155  
 156      public function tokenize(Source $source)
 157      {
 158          $this->source = $source;
 159          $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
 160          $this->cursor = 0;
 161          $this->lineno = 1;
 162          $this->end = \strlen($this->code);
 163          $this->tokens = [];
 164          $this->state = self::STATE_DATA;
 165          $this->states = [];
 166          $this->brackets = [];
 167          $this->position = -1;
 168  
 169          // find all token starts in one go
 170          preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
 171          $this->positions = $matches;
 172  
 173          while ($this->cursor < $this->end) {
 174              // dispatch to the lexing functions depending
 175              // on the current state
 176              switch ($this->state) {
 177                  case self::STATE_DATA:
 178                      $this->lexData();
 179                      break;
 180  
 181                  case self::STATE_BLOCK:
 182                      $this->lexBlock();
 183                      break;
 184  
 185                  case self::STATE_VAR:
 186                      $this->lexVar();
 187                      break;
 188  
 189                  case self::STATE_STRING:
 190                      $this->lexString();
 191                      break;
 192  
 193                  case self::STATE_INTERPOLATION:
 194                      $this->lexInterpolation();
 195                      break;
 196              }
 197          }
 198  
 199          $this->pushToken(/* Token::EOF_TYPE */ -1);
 200  
 201          if (!empty($this->brackets)) {
 202              list($expect, $lineno) = array_pop($this->brackets);
 203              throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 204          }
 205  
 206          return new TokenStream($this->tokens, $this->source);
 207      }
 208  
 209      private function lexData()
 210      {
 211          // if no matches are left we return the rest of the template as simple text token
 212          if ($this->position == \count($this->positions[0]) - 1) {
 213              $this->pushToken(/* Token::TEXT_TYPE */ 0, substr($this->code, $this->cursor));
 214              $this->cursor = $this->end;
 215  
 216              return;
 217          }
 218  
 219          // Find the first token after the current cursor
 220          $position = $this->positions[0][++$this->position];
 221          while ($position[1] < $this->cursor) {
 222              if ($this->position == \count($this->positions[0]) - 1) {
 223                  return;
 224              }
 225              $position = $this->positions[0][++$this->position];
 226          }
 227  
 228          // push the template text first
 229          $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
 230  
 231          // trim?
 232          if (isset($this->positions[2][$this->position][0])) {
 233              if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
 234                  // whitespace_trim detected ({%-, {{- or {#-)
 235                  $text = rtrim($text);
 236              } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
 237                  // whitespace_line_trim detected ({%~, {{~ or {#~)
 238                  // don't trim \r and \n
 239                  $text = rtrim($text, " \t\0\x0B");
 240              }
 241          }
 242          $this->pushToken(/* Token::TEXT_TYPE */ 0, $text);
 243          $this->moveCursor($textContent.$position[0]);
 244  
 245          switch ($this->positions[1][$this->position][0]) {
 246              case $this->options['tag_comment'][0]:
 247                  $this->lexComment();
 248                  break;
 249  
 250              case $this->options['tag_block'][0]:
 251                  // raw data?
 252                  if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
 253                      $this->moveCursor($match[0]);
 254                      $this->lexRawData();
 255                  // {% line \d+ %}
 256                  } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
 257                      $this->moveCursor($match[0]);
 258                      $this->lineno = (int) $match[1];
 259                  } else {
 260                      $this->pushToken(/* Token::BLOCK_START_TYPE */ 1);
 261                      $this->pushState(self::STATE_BLOCK);
 262                      $this->currentVarBlockLine = $this->lineno;
 263                  }
 264                  break;
 265  
 266              case $this->options['tag_variable'][0]:
 267                  $this->pushToken(/* Token::VAR_START_TYPE */ 2);
 268                  $this->pushState(self::STATE_VAR);
 269                  $this->currentVarBlockLine = $this->lineno;
 270                  break;
 271          }
 272      }
 273  
 274      private function lexBlock()
 275      {
 276          if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
 277              $this->pushToken(/* Token::BLOCK_END_TYPE */ 3);
 278              $this->moveCursor($match[0]);
 279              $this->popState();
 280          } else {
 281              $this->lexExpression();
 282          }
 283      }
 284  
 285      private function lexVar()
 286      {
 287          if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
 288              $this->pushToken(/* Token::VAR_END_TYPE */ 4);
 289              $this->moveCursor($match[0]);
 290              $this->popState();
 291          } else {
 292              $this->lexExpression();
 293          }
 294      }
 295  
 296      private function lexExpression()
 297      {
 298          // whitespace
 299          if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
 300              $this->moveCursor($match[0]);
 301  
 302              if ($this->cursor >= $this->end) {
 303                  throw new SyntaxError(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
 304              }
 305          }
 306  
 307          // arrow function
 308          if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor + 1]) {
 309              $this->pushToken(Token::ARROW_TYPE, '=>');
 310              $this->moveCursor('=>');
 311          }
 312          // operators
 313          elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
 314              $this->pushToken(/* Token::OPERATOR_TYPE */ 8, preg_replace('/\s+/', ' ', $match[0]));
 315              $this->moveCursor($match[0]);
 316          }
 317          // names
 318          elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
 319              $this->pushToken(/* Token::NAME_TYPE */ 5, $match[0]);
 320              $this->moveCursor($match[0]);
 321          }
 322          // numbers
 323          elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
 324              $number = (float) $match[0];  // floats
 325              if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
 326                  $number = (int) $match[0]; // integers lower than the maximum
 327              }
 328              $this->pushToken(/* Token::NUMBER_TYPE */ 6, $number);
 329              $this->moveCursor($match[0]);
 330          }
 331          // punctuation
 332          elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
 333              // opening bracket
 334              if (false !== strpos('([{', $this->code[$this->cursor])) {
 335                  $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
 336              }
 337              // closing bracket
 338              elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
 339                  if (empty($this->brackets)) {
 340                      throw new SyntaxError(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 341                  }
 342  
 343                  list($expect, $lineno) = array_pop($this->brackets);
 344                  if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
 345                      throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 346                  }
 347              }
 348  
 349              $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9, $this->code[$this->cursor]);
 350              ++$this->cursor;
 351          }
 352          // strings
 353          elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
 354              $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1)));
 355              $this->moveCursor($match[0]);
 356          }
 357          // opening double quoted string
 358          elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
 359              $this->brackets[] = ['"', $this->lineno];
 360              $this->pushState(self::STATE_STRING);
 361              $this->moveCursor($match[0]);
 362          }
 363          // unlexable
 364          else {
 365              throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 366          }
 367      }
 368  
 369      private function lexRawData()
 370      {
 371          if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
 372              throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
 373          }
 374  
 375          $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
 376          $this->moveCursor($text.$match[0][0]);
 377  
 378          // trim?
 379          if (isset($match[1][0])) {
 380              if ($this->options['whitespace_trim'] === $match[1][0]) {
 381                  // whitespace_trim detected ({%-, {{- or {#-)
 382                  $text = rtrim($text);
 383              } else {
 384                  // whitespace_line_trim detected ({%~, {{~ or {#~)
 385                  // don't trim \r and \n
 386                  $text = rtrim($text, " \t\0\x0B");
 387              }
 388          }
 389  
 390          $this->pushToken(/* Token::TEXT_TYPE */ 0, $text);
 391      }
 392  
 393      private function lexComment()
 394      {
 395          if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
 396              throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
 397          }
 398  
 399          $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
 400      }
 401  
 402      private function lexString()
 403      {
 404          if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
 405              $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
 406              $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10);
 407              $this->moveCursor($match[0]);
 408              $this->pushState(self::STATE_INTERPOLATION);
 409          } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && \strlen($match[0]) > 0) {
 410              $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0]));
 411              $this->moveCursor($match[0]);
 412          } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
 413              list($expect, $lineno) = array_pop($this->brackets);
 414              if ('"' != $this->code[$this->cursor]) {
 415                  throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 416              }
 417  
 418              $this->popState();
 419              ++$this->cursor;
 420          } else {
 421              // unlexable
 422              throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 423          }
 424      }
 425  
 426      private function lexInterpolation()
 427      {
 428          $bracket = end($this->brackets);
 429          if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
 430              array_pop($this->brackets);
 431              $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11);
 432              $this->moveCursor($match[0]);
 433              $this->popState();
 434          } else {
 435              $this->lexExpression();
 436          }
 437      }
 438  
 439      private function pushToken($type, $value = '')
 440      {
 441          // do not push empty text tokens
 442          if (/* Token::TEXT_TYPE */ 0 === $type && '' === $value) {
 443              return;
 444          }
 445  
 446          $this->tokens[] = new Token($type, $value, $this->lineno);
 447      }
 448  
 449      private function moveCursor($text)
 450      {
 451          $this->cursor += \strlen($text);
 452          $this->lineno += substr_count($text, "\n");
 453      }
 454  
 455      private function getOperatorRegex()
 456      {
 457          $operators = array_merge(
 458              ['='],
 459              array_keys($this->env->getUnaryOperators()),
 460              array_keys($this->env->getBinaryOperators())
 461          );
 462  
 463          $operators = array_combine($operators, array_map('strlen', $operators));
 464          arsort($operators);
 465  
 466          $regex = [];
 467          foreach ($operators as $operator => $length) {
 468              // an operator that ends with a character must be followed by
 469              // a whitespace or a parenthesis
 470              if (ctype_alpha($operator[$length - 1])) {
 471                  $r = preg_quote($operator, '/').'(?=[\s()])';
 472              } else {
 473                  $r = preg_quote($operator, '/');
 474              }
 475  
 476              // an operator with a space can be any amount of whitespaces
 477              $r = preg_replace('/\s+/', '\s+', $r);
 478  
 479              $regex[] = $r;
 480          }
 481  
 482          return '/'.implode('|', $regex).'/A';
 483      }
 484  
 485      private function pushState($state)
 486      {
 487          $this->states[] = $this->state;
 488          $this->state = $state;
 489      }
 490  
 491      private function popState()
 492      {
 493          if (0 === \count($this->states)) {
 494              throw new \LogicException('Cannot pop state without a previous state.');
 495          }
 496  
 497          $this->state = array_pop($this->states);
 498      }
 499  }
 500  
 501  class_alias('Twig\Lexer', 'Twig_Lexer');


Generated: Tue Apr 7 19:44:41 2020 Cross-referenced by PHPXref 0.7.1