[ Index ]

PHP Cross Reference of phpBB-3.2.8-deutsch

title

Body

[close]

/vendor/twig/twig/src/ -> Lexer.php (source)

   1  <?php
   2  
   3  /*
   4   * This file is part of Twig.
   5   *
   6   * (c) Fabien Potencier
   7   * (c) Armin Ronacher
   8   *
   9   * For the full copyright and license information, please view the LICENSE
  10   * file that was distributed with this source code.
  11   */
  12  
  13  namespace Twig;
  14  
  15  use Twig\Error\SyntaxError;
  16  
  17  /**
  18   * Lexes a template string.
  19   *
  20   * @author Fabien Potencier <fabien@symfony.com>
  21   */
  22  class Lexer implements \Twig_LexerInterface
  23  {
  24      protected $tokens;
  25      protected $code;
  26      protected $cursor;
  27      protected $lineno;
  28      protected $end;
  29      protected $state;
  30      protected $states;
  31      protected $brackets;
  32      protected $env;
  33      // to be renamed to $name in 2.0 (where it is private)
  34      protected $filename;
  35      protected $options;
  36      protected $regexes;
  37      protected $position;
  38      protected $positions;
  39      protected $currentVarBlockLine;
  40  
  41      private $source;
  42  
  43      const STATE_DATA = 0;
  44      const STATE_BLOCK = 1;
  45      const STATE_VAR = 2;
  46      const STATE_STRING = 3;
  47      const STATE_INTERPOLATION = 4;
  48  
  49      const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  50      const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A';
  51      const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  52      const REGEX_DQ_STRING_DELIM = '/"/A';
  53      const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  54      const PUNCTUATION = '()[]{}?:.,|';
  55  
  56      public function __construct(Environment $env, array $options = [])
  57      {
  58          $this->env = $env;
  59  
  60          $this->options = array_merge([
  61              'tag_comment' => ['{#', '#}'],
  62              'tag_block' => ['{%', '%}'],
  63              'tag_variable' => ['{{', '}}'],
  64              'whitespace_trim' => '-',
  65              'whitespace_line_trim' => '~',
  66              'whitespace_line_chars' => ' \t\0\x0B',
  67              'interpolation' => ['#{', '}'],
  68          ], $options);
  69  
  70          // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  71          $this->regexes = [
  72              // }}
  73              'lex_var' => '{
  74                  \s*
  75                  (?:'.
  76                      preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  77                      '|'.
  78                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  79                      '|'.
  80                      preg_quote($this->options['tag_variable'][1], '#'). // }}
  81                  ')
  82              }Ax',
  83  
  84              // %}
  85              'lex_block' => '{
  86                  \s*
  87                  (?:'.
  88                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  89                      '|'.
  90                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  91                      '|'.
  92                      preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  93                  ')
  94              }Ax',
  95  
  96              // {% endverbatim %}
  97              'lex_raw_data' => '{'.
  98                  preg_quote($this->options['tag_block'][0], '#'). // {%
  99                  '('.
 100                      $this->options['whitespace_trim']. // -
 101                      '|'.
 102                      $this->options['whitespace_line_trim']. // ~
 103                  ')?\s*'.
 104                  '(?:end%s)'. // endraw or endverbatim
 105                  '\s*'.
 106                  '(?:'.
 107                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
 108                      '|'.
 109                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
 110                      '|'.
 111                      preg_quote($this->options['tag_block'][1], '#'). // %}
 112                  ')
 113              }sx',
 114  
 115              'operator' => $this->getOperatorRegex(),
 116  
 117              // #}
 118              'lex_comment' => '{
 119                  (?:'.
 120                      preg_quote($this->options['whitespace_trim']).preg_quote($this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
 121                      '|'.
 122                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
 123                      '|'.
 124                      preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
 125                  ')
 126              }sx',
 127  
 128              // verbatim %}
 129              'lex_block_raw' => '{
 130                  \s*
 131                  (raw|verbatim)
 132                  \s*
 133                  (?:'.
 134                      preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
 135                      '|'.
 136                      preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
 137                      '|'.
 138                      preg_quote($this->options['tag_block'][1], '#'). // %}
 139                  ')
 140              }Asx',
 141  
 142              'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
 143  
 144              // {{ or {% or {#
 145              'lex_tokens_start' => '{
 146                  ('.
 147                      preg_quote($this->options['tag_variable'][0], '#'). // {{
 148                      '|'.
 149                      preg_quote($this->options['tag_block'][0], '#'). // {%
 150                      '|'.
 151                      preg_quote($this->options['tag_comment'][0], '#'). // {#
 152                  ')('.
 153                      preg_quote($this->options['whitespace_trim'], '#'). // -
 154                      '|'.
 155                      preg_quote($this->options['whitespace_line_trim'], '#'). // ~
 156                  ')?
 157              }sx',
 158              'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
 159              'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
 160          ];
 161      }
 162  
 163      public function tokenize($code, $name = null)
 164      {
 165          if (!$code instanceof Source) {
 166              @trigger_error(sprintf('Passing a string as the $code argument of %s() is deprecated since version 1.27 and will be removed in 2.0. Pass a \Twig\Source instance instead.', __METHOD__), E_USER_DEPRECATED);
 167              $this->source = new Source($code, $name);
 168          } else {
 169              $this->source = $code;
 170          }
 171  
 172          if (((int) ini_get('mbstring.func_overload')) & 2) {
 173              @trigger_error('Support for having "mbstring.func_overload" different from 0 is deprecated version 1.29 and will be removed in 2.0.', E_USER_DEPRECATED);
 174          }
 175  
 176          if (\function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
 177              $mbEncoding = mb_internal_encoding();
 178              mb_internal_encoding('ASCII');
 179          } else {
 180              $mbEncoding = null;
 181          }
 182  
 183          $this->code = str_replace(["\r\n", "\r"], "\n", $this->source->getCode());
 184          $this->filename = $this->source->getName();
 185          $this->cursor = 0;
 186          $this->lineno = 1;
 187          $this->end = \strlen($this->code);
 188          $this->tokens = [];
 189          $this->state = self::STATE_DATA;
 190          $this->states = [];
 191          $this->brackets = [];
 192          $this->position = -1;
 193  
 194          // find all token starts in one go
 195          preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
 196          $this->positions = $matches;
 197  
 198          while ($this->cursor < $this->end) {
 199              // dispatch to the lexing functions depending
 200              // on the current state
 201              switch ($this->state) {
 202                  case self::STATE_DATA:
 203                      $this->lexData();
 204                      break;
 205  
 206                  case self::STATE_BLOCK:
 207                      $this->lexBlock();
 208                      break;
 209  
 210                  case self::STATE_VAR:
 211                      $this->lexVar();
 212                      break;
 213  
 214                  case self::STATE_STRING:
 215                      $this->lexString();
 216                      break;
 217  
 218                  case self::STATE_INTERPOLATION:
 219                      $this->lexInterpolation();
 220                      break;
 221              }
 222          }
 223  
 224          $this->pushToken(Token::EOF_TYPE);
 225  
 226          if (!empty($this->brackets)) {
 227              list($expect, $lineno) = array_pop($this->brackets);
 228              throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 229          }
 230  
 231          if ($mbEncoding) {
 232              mb_internal_encoding($mbEncoding);
 233          }
 234  
 235          return new TokenStream($this->tokens, $this->source);
 236      }
 237  
 238      protected function lexData()
 239      {
 240          // if no matches are left we return the rest of the template as simple text token
 241          if ($this->position == \count($this->positions[0]) - 1) {
 242              $this->pushToken(Token::TEXT_TYPE, substr($this->code, $this->cursor));
 243              $this->cursor = $this->end;
 244  
 245              return;
 246          }
 247  
 248          // Find the first token after the current cursor
 249          $position = $this->positions[0][++$this->position];
 250          while ($position[1] < $this->cursor) {
 251              if ($this->position == \count($this->positions[0]) - 1) {
 252                  return;
 253              }
 254              $position = $this->positions[0][++$this->position];
 255          }
 256  
 257          // push the template text first
 258          $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
 259  
 260          // trim?
 261          if (isset($this->positions[2][$this->position][0])) {
 262              if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
 263                  // whitespace_trim detected ({%-, {{- or {#-)
 264                  $text = rtrim($text);
 265              } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
 266                  // whitespace_line_trim detected ({%~, {{~ or {#~)
 267                  // don't trim \r and \n
 268                  $text = rtrim($text, " \t\0\x0B");
 269              }
 270          }
 271          $this->pushToken(Token::TEXT_TYPE, $text);
 272          $this->moveCursor($textContent.$position[0]);
 273  
 274          switch ($this->positions[1][$this->position][0]) {
 275              case $this->options['tag_comment'][0]:
 276                  $this->lexComment();
 277                  break;
 278  
 279              case $this->options['tag_block'][0]:
 280                  // raw data?
 281                  if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
 282                      $this->moveCursor($match[0]);
 283                      $this->lexRawData($match[1]);
 284                  // {% line \d+ %}
 285                  } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
 286                      $this->moveCursor($match[0]);
 287                      $this->lineno = (int) $match[1];
 288                  } else {
 289                      $this->pushToken(Token::BLOCK_START_TYPE);
 290                      $this->pushState(self::STATE_BLOCK);
 291                      $this->currentVarBlockLine = $this->lineno;
 292                  }
 293                  break;
 294  
 295              case $this->options['tag_variable'][0]:
 296                  $this->pushToken(Token::VAR_START_TYPE);
 297                  $this->pushState(self::STATE_VAR);
 298                  $this->currentVarBlockLine = $this->lineno;
 299                  break;
 300          }
 301      }
 302  
 303      protected function lexBlock()
 304      {
 305          if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
 306              $this->pushToken(Token::BLOCK_END_TYPE);
 307              $this->moveCursor($match[0]);
 308              $this->popState();
 309          } else {
 310              $this->lexExpression();
 311          }
 312      }
 313  
 314      protected function lexVar()
 315      {
 316          if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
 317              $this->pushToken(Token::VAR_END_TYPE);
 318              $this->moveCursor($match[0]);
 319              $this->popState();
 320          } else {
 321              $this->lexExpression();
 322          }
 323      }
 324  
 325      protected function lexExpression()
 326      {
 327          // whitespace
 328          if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
 329              $this->moveCursor($match[0]);
 330  
 331              if ($this->cursor >= $this->end) {
 332                  throw new SyntaxError(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
 333              }
 334          }
 335  
 336          // arrow function
 337          if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor + 1]) {
 338              $this->pushToken(Token::ARROW_TYPE, '=>');
 339              $this->moveCursor('=>');
 340          }
 341          // operators
 342          elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
 343              $this->pushToken(Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
 344              $this->moveCursor($match[0]);
 345          }
 346          // names
 347          elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
 348              $this->pushToken(Token::NAME_TYPE, $match[0]);
 349              $this->moveCursor($match[0]);
 350          }
 351          // numbers
 352          elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
 353              $number = (float) $match[0];  // floats
 354              if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
 355                  $number = (int) $match[0]; // integers lower than the maximum
 356              }
 357              $this->pushToken(Token::NUMBER_TYPE, $number);
 358              $this->moveCursor($match[0]);
 359          }
 360          // punctuation
 361          elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
 362              // opening bracket
 363              if (false !== strpos('([{', $this->code[$this->cursor])) {
 364                  $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
 365              }
 366              // closing bracket
 367              elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
 368                  if (empty($this->brackets)) {
 369                      throw new SyntaxError(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 370                  }
 371  
 372                  list($expect, $lineno) = array_pop($this->brackets);
 373                  if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
 374                      throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 375                  }
 376              }
 377  
 378              $this->pushToken(Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
 379              ++$this->cursor;
 380          }
 381          // strings
 382          elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
 383              $this->pushToken(Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
 384              $this->moveCursor($match[0]);
 385          }
 386          // opening double quoted string
 387          elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
 388              $this->brackets[] = ['"', $this->lineno];
 389              $this->pushState(self::STATE_STRING);
 390              $this->moveCursor($match[0]);
 391          }
 392          // unlexable
 393          else {
 394              throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 395          }
 396      }
 397  
 398      protected function lexRawData($tag)
 399      {
 400          if ('raw' === $tag) {
 401              @trigger_error(sprintf('Twig Tag "raw" is deprecated since version 1.21. Use "verbatim" instead in %s at line %d.', $this->filename, $this->lineno), E_USER_DEPRECATED);
 402          }
 403  
 404          if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
 405              throw new SyntaxError(sprintf('Unexpected end of file: Unclosed "%s" block.', $tag), $this->lineno, $this->source);
 406          }
 407  
 408          $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
 409          $this->moveCursor($text.$match[0][0]);
 410  
 411          // trim?
 412          if (isset($match[1][0])) {
 413              if ($this->options['whitespace_trim'] === $match[1][0]) {
 414                  // whitespace_trim detected ({%-, {{- or {#-)
 415                  $text = rtrim($text);
 416              } else {
 417                  // whitespace_line_trim detected ({%~, {{~ or {#~)
 418                  // don't trim \r and \n
 419                  $text = rtrim($text, " \t\0\x0B");
 420              }
 421          }
 422  
 423          $this->pushToken(Token::TEXT_TYPE, $text);
 424      }
 425  
 426      protected function lexComment()
 427      {
 428          if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
 429              throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
 430          }
 431  
 432          $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
 433      }
 434  
 435      protected function lexString()
 436      {
 437          if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
 438              $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
 439              $this->pushToken(Token::INTERPOLATION_START_TYPE);
 440              $this->moveCursor($match[0]);
 441              $this->pushState(self::STATE_INTERPOLATION);
 442          } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && \strlen($match[0]) > 0) {
 443              $this->pushToken(Token::STRING_TYPE, stripcslashes($match[0]));
 444              $this->moveCursor($match[0]);
 445          } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
 446              list($expect, $lineno) = array_pop($this->brackets);
 447              if ('"' != $this->code[$this->cursor]) {
 448                  throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
 449              }
 450  
 451              $this->popState();
 452              ++$this->cursor;
 453          } else {
 454              // unlexable
 455              throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
 456          }
 457      }
 458  
 459      protected function lexInterpolation()
 460      {
 461          $bracket = end($this->brackets);
 462          if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
 463              array_pop($this->brackets);
 464              $this->pushToken(Token::INTERPOLATION_END_TYPE);
 465              $this->moveCursor($match[0]);
 466              $this->popState();
 467          } else {
 468              $this->lexExpression();
 469          }
 470      }
 471  
 472      protected function pushToken($type, $value = '')
 473      {
 474          // do not push empty text tokens
 475          if (Token::TEXT_TYPE === $type && '' === $value) {
 476              return;
 477          }
 478  
 479          $this->tokens[] = new Token($type, $value, $this->lineno);
 480      }
 481  
 482      protected function moveCursor($text)
 483      {
 484          $this->cursor += \strlen($text);
 485          $this->lineno += substr_count($text, "\n");
 486      }
 487  
 488      protected function getOperatorRegex()
 489      {
 490          $operators = array_merge(
 491              ['='],
 492              array_keys($this->env->getUnaryOperators()),
 493              array_keys($this->env->getBinaryOperators())
 494          );
 495  
 496          $operators = array_combine($operators, array_map('strlen', $operators));
 497          arsort($operators);
 498  
 499          $regex = [];
 500          foreach ($operators as $operator => $length) {
 501              // an operator that ends with a character must be followed by
 502              // a whitespace or a parenthesis
 503              if (ctype_alpha($operator[$length - 1])) {
 504                  $r = preg_quote($operator, '/').'(?=[\s()])';
 505              } else {
 506                  $r = preg_quote($operator, '/');
 507              }
 508  
 509              // an operator with a space can be any amount of whitespaces
 510              $r = preg_replace('/\s+/', '\s+', $r);
 511  
 512              $regex[] = $r;
 513          }
 514  
 515          return '/'.implode('|', $regex).'/A';
 516      }
 517  
 518      protected function pushState($state)
 519      {
 520          $this->states[] = $this->state;
 521          $this->state = $state;
 522      }
 523  
 524      protected function popState()
 525      {
 526          if (0 === \count($this->states)) {
 527              throw new \LogicException('Cannot pop state without a previous state.');
 528          }
 529  
 530          $this->state = array_pop($this->states);
 531      }
 532  }
 533  
 534  class_alias('Twig\Lexer', 'Twig_Lexer');


Generated: Tue Apr 7 19:42:26 2020 Cross-referenced by PHPXref 0.7.1