[ Index ]

PHP Cross Reference of phpBB-3.3.14-deutsch

title

Body

[close]

/vendor/twig/twig/src/ -> Lexer.php (source)

   1  <?php
   2  
   3  /*
   4   * This file is part of Twig.
   5   *
   6   * (c) Fabien Potencier
   7   * (c) Armin Ronacher
   8   *
   9   * For the full copyright and license information, please view the LICENSE
  10   * file that was distributed with this source code.
  11   */
  12  
  13  namespace Twig;
  14  
  15  use Twig\Error\SyntaxError;
  16  
  17  /**
  18   * Lexes a template string.
  19   *
  20   * @author Fabien Potencier <fabien@symfony.com>
  21   */
  22  class Lexer
  23  {
  24      private $isInitialized = false;
  25  
  26      private $tokens;
  27      private $code;
  28      private $cursor;
  29      private $lineno;
  30      private $end;
  31      private $state;
  32      private $states;
  33      private $brackets;
  34      private $env;
  35      private $source;
  36      private $options;
  37      private $regexes;
  38      private $position;
  39      private $positions;
  40      private $currentVarBlockLine;
  41  
  42      public const STATE_DATA = 0;
  43      public const STATE_BLOCK = 1;
  44      public const STATE_VAR = 2;
  45      public const STATE_STRING = 3;
  46      public const STATE_INTERPOLATION = 4;
  47  
  48      public const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  49      public const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  50      public const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  51      public const REGEX_DQ_STRING_DELIM = '/"/A';
  52      public const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  53      public const PUNCTUATION = '()[]{}?:.,|';
  54  
  55      public function __construct(Environment $env, array $options = [])
  56      {
  57          $this->env = $env;
  58  
  59          $this->options = array_merge([
  60              'tag_comment' => ['{#', '#}'],
  61              'tag_block' => ['{%', '%}'],
  62              'tag_variable' => ['{{', '}}'],
  63              'whitespace_trim' => '-',
  64              'whitespace_line_trim' => '~',
  65              'whitespace_line_chars' => ' \t\0\x0B',
  66              'interpolation' => ['#{', '}'],
  67          ], $options);
  68      }
  69  
  70      private function initialize()
  71      {
  72          if ($this->isInitialized) {
  73              return;
  74          }
  75  
  76          $this->isInitialized = true;
  77  
  78          // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  79          $this->regexes = [
  80              // }}
  81              'lex_var' => '{
  82                  \s*
  83                  (?:'.
  84                      preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  85                      '|'.
  86                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  87                      '|'.
  88                      preg_quote($this->options['tag_variable'][1], '#'). // }}
  89                  ')
  90              }Ax',
  91  
  92              // %}
  93              'lex_block' => '{
  94                  \s*
  95                  (?:'.
  96                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  97                      '|'.
  98                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  99                      '|'.
 100                      preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
 101                  ')
 102              }Ax',
 103  
 104              // {% endverbatim %}
 105              'lex_raw_data' => '{'.
 106                  preg_quote($this->options['tag_block'][0], '#'). // {%
 107                  '('.
 108                      $this->options['whitespace_trim']. // -
 109                      '|'.
 110                      $this->options['whitespace_line_trim']. // ~
 111                  ')?\s*endverbatim\s*'.
 112                  '(?:'.
 113                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
 114                      '|'.
 115                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
 116                      '|'.
 117                      preg_quote($this->options['tag_block'][1], '#'). // %}
 118                  ')
 119              }sx',
 120  
 121              'operator' => $this->getOperatorRegex(),
 122  
 123              // #}
 124              'lex_comment' => '{
 125                  (?:'.
 126                      preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
 127                      '|'.
 128                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
 129                      '|'.
 130                      preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
 131                  ')
 132              }sx',
 133  
 134              // verbatim %}
 135              'lex_block_raw' => '{
 136                  \s*verbatim\s*
 137                  (?:'.
 138                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
 139                      '|'.
 140                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
 141                      '|'.
 142                      preg_quote($this->options['tag_block'][1], '#'). // %}
 143                  ')
 144              }Asx',
 145  
 146              'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
 147  
 148              // {{ or {% or {#
 149              'lex_tokens_start' => '{
 150                  ('.
 151                      preg_quote($this->options['tag_variable'][0], '#'). // {{
 152                      '|'.
 153                      preg_quote($this->options['tag_block'][0], '#'). // {%
 154                      '|'.
 155                      preg_quote($this->options['tag_comment'][0], '#'). // {#
 156                  ')('.
 157                      preg_quote($this->options['whitespace_trim'], '#'). // -
 158                      '|'.
 159                      preg_quote($this->options['whitespace_line_trim'], '#'). // ~
 160                  ')?
 161              }sx',
 162              'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
 163              'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
 164          ];
 165      }
 166  
 167      public function tokenize(Source $source)
 168      {
 169          $this->initialize();
 170  
 171          $this->source = $source;
 172          $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
 173          $this->cursor = 0;
 174          $this->lineno = 1;
 175          $this->end = \strlen($this->code);
 176          $this->tokens = [];
 177          $this->state = self::STATE_DATA;
 178          $this->states = [];
 179          $this->brackets = [];
 180          $this->position = -1;
 181  
 182          // find all token starts in one go
 183          preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, \PREG_OFFSET_CAPTURE);
 184          $this->positions = $matches;
 185  
 186          while ($this->cursor < $this->end) {
 187              // dispatch to the lexing functions depending
 188              // on the current state
 189              switch ($this->state) {
 190                  case self::STATE_DATA:
 191                      $this->lexData();
 192                      break;
 193  
 194                  case self::STATE_BLOCK:
 195                      $this->lexBlock();
 196                      break;
 197  
 198                  case self::STATE_VAR:
 199                      $this->lexVar();
 200                      break;
 201  
 202                  case self::STATE_STRING:
 203                      $this->lexString();
 204                      break;
 205  
 206                  case self::STATE_INTERPOLATION:
 207                      $this->lexInterpolation();
 208                      break;
 209              }
 210          }
 211  
 212          $this->pushToken(/* Token::EOF_TYPE */ -1);
 213  
 214          if (!empty($this->brackets)) {
 215              list($expect, $lineno) = array_pop($this->brackets);
 216              throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 217          }
 218  
 219          return new TokenStream($this->tokens, $this->source);
 220      }
 221  
 222      private function lexData()
 223      {
 224          // if no matches are left we return the rest of the template as simple text token
 225          if ($this->position == \count($this->positions[0]) - 1) {
 226              $this->pushToken(/* Token::TEXT_TYPE */ 0, substr($this->code, $this->cursor));
 227              $this->cursor = $this->end;
 228  
 229              return;
 230          }
 231  
 232          // Find the first token after the current cursor
 233          $position = $this->positions[0][++$this->position];
 234          while ($position[1] < $this->cursor) {
 235              if ($this->position == \count($this->positions[0]) - 1) {
 236                  return;
 237              }
 238              $position = $this->positions[0][++$this->position];
 239          }
 240  
 241          // push the template text first
 242          $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
 243  
 244          // trim?
 245          if (isset($this->positions[2][$this->position][0])) {
 246              if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
 247                  // whitespace_trim detected ({%-, {{- or {#-)
 248                  $text = rtrim($text);
 249              } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
 250                  // whitespace_line_trim detected ({%~, {{~ or {#~)
 251                  // don't trim \r and \n
 252                  $text = rtrim($text, " \t\0\x0B");
 253              }
 254          }
 255          $this->pushToken(/* Token::TEXT_TYPE */ 0, $text);
 256          $this->moveCursor($textContent.$position[0]);
 257  
 258          switch ($this->positions[1][$this->position][0]) {
 259              case $this->options['tag_comment'][0]:
 260                  $this->lexComment();
 261                  break;
 262  
 263              case $this->options['tag_block'][0]:
 264                  // raw data?
 265                  if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
 266                      $this->moveCursor($match[0]);
 267                      $this->lexRawData();
 268                  // {% line \d+ %}
 269                  } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
 270                      $this->moveCursor($match[0]);
 271                      $this->lineno = (int) $match[1];
 272                  } else {
 273                      $this->pushToken(/* Token::BLOCK_START_TYPE */ 1);
 274                      $this->pushState(self::STATE_BLOCK);
 275                      $this->currentVarBlockLine = $this->lineno;
 276                  }
 277                  break;
 278  
 279              case $this->options['tag_variable'][0]:
 280                  $this->pushToken(/* Token::VAR_START_TYPE */ 2);
 281                  $this->pushState(self::STATE_VAR);
 282                  $this->currentVarBlockLine = $this->lineno;
 283                  break;
 284          }
 285      }
 286  
 287      private function lexBlock()
 288      {
 289          if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
 290              $this->pushToken(/* Token::BLOCK_END_TYPE */ 3);
 291              $this->moveCursor($match[0]);
 292              $this->popState();
 293          } else {
 294              $this->lexExpression();
 295          }
 296      }
 297  
 298      private function lexVar()
 299      {
 300          if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
 301              $this->pushToken(/* Token::VAR_END_TYPE */ 4);
 302              $this->moveCursor($match[0]);
 303              $this->popState();
 304          } else {
 305              $this->lexExpression();
 306          }
 307      }
 308  
 309      private function lexExpression()
 310      {
 311          // whitespace
 312          if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
 313              $this->moveCursor($match[0]);
 314  
 315              if ($this->cursor >= $this->end) {
 316                  throw new SyntaxError(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
 317              }
 318          }
 319  
 320          // arrow function
 321          if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor + 1]) {
 322              $this->pushToken(Token::ARROW_TYPE, '=>');
 323              $this->moveCursor('=>');
 324          }
 325          // operators
 326          elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
 327              $this->pushToken(/* Token::OPERATOR_TYPE */ 8, preg_replace('/\s+/', ' ', $match[0]));
 328              $this->moveCursor($match[0]);
 329          }
 330          // names
 331          elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
 332              $this->pushToken(/* Token::NAME_TYPE */ 5, $match[0]);
 333              $this->moveCursor($match[0]);
 334          }
 335          // numbers
 336          elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
 337              $number = (float) $match[0];  // floats
 338              if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) {
 339                  $number = (int) $match[0]; // integers lower than the maximum
 340              }
 341              $this->pushToken(/* Token::NUMBER_TYPE */ 6, $number);
 342              $this->moveCursor($match[0]);
 343          }
 344          // punctuation
 345          elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
 346              // opening bracket
 347              if (false !== strpos('([{', $this->code[$this->cursor])) {
 348                  $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
 349              }
 350              // closing bracket
 351              elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
 352                  if (empty($this->brackets)) {
 353                      throw new SyntaxError(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 354                  }
 355  
 356                  list($expect, $lineno) = array_pop($this->brackets);
 357                  if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
 358                      throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 359                  }
 360              }
 361  
 362              $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9, $this->code[$this->cursor]);
 363              ++$this->cursor;
 364          }
 365          // strings
 366          elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
 367              $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1)));
 368              $this->moveCursor($match[0]);
 369          }
 370          // opening double quoted string
 371          elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
 372              $this->brackets[] = ['"', $this->lineno];
 373              $this->pushState(self::STATE_STRING);
 374              $this->moveCursor($match[0]);
 375          }
 376          // unlexable
 377          else {
 378              throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 379          }
 380      }
 381  
 382      private function lexRawData()
 383      {
 384          if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
 385              throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
 386          }
 387  
 388          $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
 389          $this->moveCursor($text.$match[0][0]);
 390  
 391          // trim?
 392          if (isset($match[1][0])) {
 393              if ($this->options['whitespace_trim'] === $match[1][0]) {
 394                  // whitespace_trim detected ({%-, {{- or {#-)
 395                  $text = rtrim($text);
 396              } else {
 397                  // whitespace_line_trim detected ({%~, {{~ or {#~)
 398                  // don't trim \r and \n
 399                  $text = rtrim($text, " \t\0\x0B");
 400              }
 401          }
 402  
 403          $this->pushToken(/* Token::TEXT_TYPE */ 0, $text);
 404      }
 405  
 406      private function lexComment()
 407      {
 408          if (!preg_match($this->regexes['lex_comment'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
 409              throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
 410          }
 411  
 412          $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
 413      }
 414  
 415      private function lexString()
 416      {
 417          if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
 418              $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
 419              $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10);
 420              $this->moveCursor($match[0]);
 421              $this->pushState(self::STATE_INTERPOLATION);
 422          } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && \strlen($match[0]) > 0) {
 423              $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0]));
 424              $this->moveCursor($match[0]);
 425          } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
 426              list($expect, $lineno) = array_pop($this->brackets);
 427              if ('"' != $this->code[$this->cursor]) {
 428                  throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 429              }
 430  
 431              $this->popState();
 432              ++$this->cursor;
 433          } else {
 434              // unlexable
 435              throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 436          }
 437      }
 438  
 439      private function lexInterpolation()
 440      {
 441          $bracket = end($this->brackets);
 442          if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
 443              array_pop($this->brackets);
 444              $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11);
 445              $this->moveCursor($match[0]);
 446              $this->popState();
 447          } else {
 448              $this->lexExpression();
 449          }
 450      }
 451  
 452      private function pushToken($type, $value = '')
 453      {
 454          // do not push empty text tokens
 455          if (/* Token::TEXT_TYPE */ 0 === $type && '' === $value) {
 456              return;
 457          }
 458  
 459          $this->tokens[] = new Token($type, $value, $this->lineno);
 460      }
 461  
 462      private function moveCursor($text)
 463      {
 464          $this->cursor += \strlen($text);
 465          $this->lineno += substr_count($text, "\n");
 466      }
 467  
 468      private function getOperatorRegex()
 469      {
 470          $operators = array_merge(
 471              ['='],
 472              array_keys($this->env->getUnaryOperators()),
 473              array_keys($this->env->getBinaryOperators())
 474          );
 475  
 476          $operators = array_combine($operators, array_map('strlen', $operators));
 477          arsort($operators);
 478  
 479          $regex = [];
 480          foreach ($operators as $operator => $length) {
 481              // an operator that ends with a character must be followed by
 482              // a whitespace, a parenthesis, an opening map [ or sequence {
 483              $r = preg_quote($operator, '/');
 484              if (ctype_alpha($operator[$length - 1])) {
 485                  $r .= '(?=[\s()\[{])';
 486              }
 487  
 488              // an operator that begins with a character must not have a dot or pipe before
 489              if (ctype_alpha($operator[0])) {
 490                  $r = '(?<![\.\|])'.$r;
 491              }
 492  
 493              // an operator with a space can be any amount of whitespaces
 494              $r = preg_replace('/\s+/', '\s+', $r);
 495  
 496              $regex[] = $r;
 497          }
 498  
 499          return '/'.implode('|', $regex).'/A';
 500      }
 501  
 502      private function pushState($state)
 503      {
 504          $this->states[] = $this->state;
 505          $this->state = $state;
 506      }
 507  
 508      private function popState()
 509      {
 510          if (0 === \count($this->states)) {
 511              throw new \LogicException('Cannot pop state without a previous state.');
 512          }
 513  
 514          $this->state = array_pop($this->states);
 515      }
 516  }
 517  
 518  class_alias('Twig\Lexer', 'Twig_Lexer');


Generated: Mon Nov 25 19:05:08 2024 Cross-referenced by PHPXref 0.7.1