[ Index ] |
PHP Cross Reference of phpBB-3.3.14-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /* 4 * This file is part of Twig. 5 * 6 * (c) Fabien Potencier 7 * (c) Armin Ronacher 8 * 9 * For the full copyright and license information, please view the LICENSE 10 * file that was distributed with this source code. 11 */ 12 13 namespace Twig; 14 15 use Twig\Error\SyntaxError; 16 17 /** 18 * Lexes a template string. 19 * 20 * @author Fabien Potencier <fabien@symfony.com> 21 */ 22 class Lexer 23 { 24 private $isInitialized = false; 25 26 private $tokens; 27 private $code; 28 private $cursor; 29 private $lineno; 30 private $end; 31 private $state; 32 private $states; 33 private $brackets; 34 private $env; 35 private $source; 36 private $options; 37 private $regexes; 38 private $position; 39 private $positions; 40 private $currentVarBlockLine; 41 42 public const STATE_DATA = 0; 43 public const STATE_BLOCK = 1; 44 public const STATE_VAR = 2; 45 public const STATE_STRING = 3; 46 public const STATE_INTERPOLATION = 4; 47 48 public const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A'; 49 public const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A'; 50 public const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As'; 51 public const REGEX_DQ_STRING_DELIM = '/"/A'; 52 public const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As'; 53 public const PUNCTUATION = '()[]{}?:.,|'; 54 55 public function __construct(Environment $env, array $options = []) 56 { 57 $this->env = $env; 58 59 $this->options = array_merge([ 60 'tag_comment' => ['{#', '#}'], 61 'tag_block' => ['{%', '%}'], 62 'tag_variable' => ['{{', '}}'], 63 'whitespace_trim' => '-', 64 'whitespace_line_trim' => '~', 65 'whitespace_line_chars' => ' \t\0\x0B', 66 'interpolation' => ['#{', '}'], 67 ], $options); 68 } 69 70 private function initialize() 71 { 72 if ($this->isInitialized) { 73 return; 74 } 75 76 $this->isInitialized = true; 77 78 // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default 79 $this->regexes = [ 80 // }} 81 'lex_var' => '{ 82 \s* 83 (?:'. 84 preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s* 85 '|'. 86 preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]* 87 '|'. 88 preg_quote($this->options['tag_variable'][1], '#'). // }} 89 ') 90 }Ax', 91 92 // %} 93 'lex_block' => '{ 94 \s* 95 (?:'. 96 preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n? 97 '|'. 98 preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]* 99 '|'. 100 preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n? 101 ') 102 }Ax', 103 104 // {% endverbatim %} 105 'lex_raw_data' => '{'. 106 preg_quote($this->options['tag_block'][0], '#'). // {% 107 '('. 108 $this->options['whitespace_trim']. // - 109 '|'. 110 $this->options['whitespace_line_trim']. // ~ 111 ')?\s*endverbatim\s*'. 112 '(?:'. 113 preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%} 114 '|'. 115 preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]* 116 '|'. 117 preg_quote($this->options['tag_block'][1], '#'). // %} 118 ') 119 }sx', 120 121 'operator' => $this->getOperatorRegex(), 122 123 // #} 124 'lex_comment' => '{ 125 (?:'. 126 preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n? 127 '|'. 128 preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]* 129 '|'. 130 preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n? 131 ') 132 }sx', 133 134 // verbatim %} 135 'lex_block_raw' => '{ 136 \s*verbatim\s* 137 (?:'. 138 preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s* 139 '|'. 140 preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]* 141 '|'. 142 preg_quote($this->options['tag_block'][1], '#'). // %} 143 ') 144 }Asx', 145 146 'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As', 147 148 // {{ or {% or {# 149 'lex_tokens_start' => '{ 150 ('. 151 preg_quote($this->options['tag_variable'][0], '#'). // {{ 152 '|'. 153 preg_quote($this->options['tag_block'][0], '#'). // {% 154 '|'. 155 preg_quote($this->options['tag_comment'][0], '#'). // {# 156 ')('. 157 preg_quote($this->options['whitespace_trim'], '#'). // - 158 '|'. 159 preg_quote($this->options['whitespace_line_trim'], '#'). // ~ 160 ')? 161 }sx', 162 'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A', 163 'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A', 164 ]; 165 } 166 167 public function tokenize(Source $source) 168 { 169 $this->initialize(); 170 171 $this->source = $source; 172 $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode()); 173 $this->cursor = 0; 174 $this->lineno = 1; 175 $this->end = \strlen($this->code); 176 $this->tokens = []; 177 $this->state = self::STATE_DATA; 178 $this->states = []; 179 $this->brackets = []; 180 $this->position = -1; 181 182 // find all token starts in one go 183 preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, \PREG_OFFSET_CAPTURE); 184 $this->positions = $matches; 185 186 while ($this->cursor < $this->end) { 187 // dispatch to the lexing functions depending 188 // on the current state 189 switch ($this->state) { 190 case self::STATE_DATA: 191 $this->lexData(); 192 break; 193 194 case self::STATE_BLOCK: 195 $this->lexBlock(); 196 break; 197 198 case self::STATE_VAR: 199 $this->lexVar(); 200 break; 201 202 case self::STATE_STRING: 203 $this->lexString(); 204 break; 205 206 case self::STATE_INTERPOLATION: 207 $this->lexInterpolation(); 208 break; 209 } 210 } 211 212 $this->pushToken(/* Token::EOF_TYPE */ -1); 213 214 if (!empty($this->brackets)) { 215 list($expect, $lineno) = array_pop($this->brackets); 216 throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source); 217 } 218 219 return new TokenStream($this->tokens, $this->source); 220 } 221 222 private function lexData() 223 { 224 // if no matches are left we return the rest of the template as simple text token 225 if ($this->position == \count($this->positions[0]) - 1) { 226 $this->pushToken(/* Token::TEXT_TYPE */ 0, substr($this->code, $this->cursor)); 227 $this->cursor = $this->end; 228 229 return; 230 } 231 232 // Find the first token after the current cursor 233 $position = $this->positions[0][++$this->position]; 234 while ($position[1] < $this->cursor) { 235 if ($this->position == \count($this->positions[0]) - 1) { 236 return; 237 } 238 $position = $this->positions[0][++$this->position]; 239 } 240 241 // push the template text first 242 $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor); 243 244 // trim? 245 if (isset($this->positions[2][$this->position][0])) { 246 if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) { 247 // whitespace_trim detected ({%-, {{- or {#-) 248 $text = rtrim($text); 249 } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) { 250 // whitespace_line_trim detected ({%~, {{~ or {#~) 251 // don't trim \r and \n 252 $text = rtrim($text, " \t\0\x0B"); 253 } 254 } 255 $this->pushToken(/* Token::TEXT_TYPE */ 0, $text); 256 $this->moveCursor($textContent.$position[0]); 257 258 switch ($this->positions[1][$this->position][0]) { 259 case $this->options['tag_comment'][0]: 260 $this->lexComment(); 261 break; 262 263 case $this->options['tag_block'][0]: 264 // raw data? 265 if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) { 266 $this->moveCursor($match[0]); 267 $this->lexRawData(); 268 // {% line \d+ %} 269 } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) { 270 $this->moveCursor($match[0]); 271 $this->lineno = (int) $match[1]; 272 } else { 273 $this->pushToken(/* Token::BLOCK_START_TYPE */ 1); 274 $this->pushState(self::STATE_BLOCK); 275 $this->currentVarBlockLine = $this->lineno; 276 } 277 break; 278 279 case $this->options['tag_variable'][0]: 280 $this->pushToken(/* Token::VAR_START_TYPE */ 2); 281 $this->pushState(self::STATE_VAR); 282 $this->currentVarBlockLine = $this->lineno; 283 break; 284 } 285 } 286 287 private function lexBlock() 288 { 289 if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) { 290 $this->pushToken(/* Token::BLOCK_END_TYPE */ 3); 291 $this->moveCursor($match[0]); 292 $this->popState(); 293 } else { 294 $this->lexExpression(); 295 } 296 } 297 298 private function lexVar() 299 { 300 if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) { 301 $this->pushToken(/* Token::VAR_END_TYPE */ 4); 302 $this->moveCursor($match[0]); 303 $this->popState(); 304 } else { 305 $this->lexExpression(); 306 } 307 } 308 309 private function lexExpression() 310 { 311 // whitespace 312 if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) { 313 $this->moveCursor($match[0]); 314 315 if ($this->cursor >= $this->end) { 316 throw new SyntaxError(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source); 317 } 318 } 319 320 // arrow function 321 if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor + 1]) { 322 $this->pushToken(Token::ARROW_TYPE, '=>'); 323 $this->moveCursor('=>'); 324 } 325 // operators 326 elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) { 327 $this->pushToken(/* Token::OPERATOR_TYPE */ 8, preg_replace('/\s+/', ' ', $match[0])); 328 $this->moveCursor($match[0]); 329 } 330 // names 331 elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) { 332 $this->pushToken(/* Token::NAME_TYPE */ 5, $match[0]); 333 $this->moveCursor($match[0]); 334 } 335 // numbers 336 elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) { 337 $number = (float) $match[0]; // floats 338 if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) { 339 $number = (int) $match[0]; // integers lower than the maximum 340 } 341 $this->pushToken(/* Token::NUMBER_TYPE */ 6, $number); 342 $this->moveCursor($match[0]); 343 } 344 // punctuation 345 elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) { 346 // opening bracket 347 if (false !== strpos('([{', $this->code[$this->cursor])) { 348 $this->brackets[] = [$this->code[$this->cursor], $this->lineno]; 349 } 350 // closing bracket 351 elseif (false !== strpos(')]}', $this->code[$this->cursor])) { 352 if (empty($this->brackets)) { 353 throw new SyntaxError(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source); 354 } 355 356 list($expect, $lineno) = array_pop($this->brackets); 357 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) { 358 throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source); 359 } 360 } 361 362 $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9, $this->code[$this->cursor]); 363 ++$this->cursor; 364 } 365 // strings 366 elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) { 367 $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1))); 368 $this->moveCursor($match[0]); 369 } 370 // opening double quoted string 371 elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) { 372 $this->brackets[] = ['"', $this->lineno]; 373 $this->pushState(self::STATE_STRING); 374 $this->moveCursor($match[0]); 375 } 376 // unlexable 377 else { 378 throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source); 379 } 380 } 381 382 private function lexRawData() 383 { 384 if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) { 385 throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source); 386 } 387 388 $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor); 389 $this->moveCursor($text.$match[0][0]); 390 391 // trim? 392 if (isset($match[1][0])) { 393 if ($this->options['whitespace_trim'] === $match[1][0]) { 394 // whitespace_trim detected ({%-, {{- or {#-) 395 $text = rtrim($text); 396 } else { 397 // whitespace_line_trim detected ({%~, {{~ or {#~) 398 // don't trim \r and \n 399 $text = rtrim($text, " \t\0\x0B"); 400 } 401 } 402 403 $this->pushToken(/* Token::TEXT_TYPE */ 0, $text); 404 } 405 406 private function lexComment() 407 { 408 if (!preg_match($this->regexes['lex_comment'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) { 409 throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source); 410 } 411 412 $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]); 413 } 414 415 private function lexString() 416 { 417 if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) { 418 $this->brackets[] = [$this->options['interpolation'][0], $this->lineno]; 419 $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10); 420 $this->moveCursor($match[0]); 421 $this->pushState(self::STATE_INTERPOLATION); 422 } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && \strlen($match[0]) > 0) { 423 $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0])); 424 $this->moveCursor($match[0]); 425 } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) { 426 list($expect, $lineno) = array_pop($this->brackets); 427 if ('"' != $this->code[$this->cursor]) { 428 throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source); 429 } 430 431 $this->popState(); 432 ++$this->cursor; 433 } else { 434 // unlexable 435 throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source); 436 } 437 } 438 439 private function lexInterpolation() 440 { 441 $bracket = end($this->brackets); 442 if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) { 443 array_pop($this->brackets); 444 $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11); 445 $this->moveCursor($match[0]); 446 $this->popState(); 447 } else { 448 $this->lexExpression(); 449 } 450 } 451 452 private function pushToken($type, $value = '') 453 { 454 // do not push empty text tokens 455 if (/* Token::TEXT_TYPE */ 0 === $type && '' === $value) { 456 return; 457 } 458 459 $this->tokens[] = new Token($type, $value, $this->lineno); 460 } 461 462 private function moveCursor($text) 463 { 464 $this->cursor += \strlen($text); 465 $this->lineno += substr_count($text, "\n"); 466 } 467 468 private function getOperatorRegex() 469 { 470 $operators = array_merge( 471 ['='], 472 array_keys($this->env->getUnaryOperators()), 473 array_keys($this->env->getBinaryOperators()) 474 ); 475 476 $operators = array_combine($operators, array_map('strlen', $operators)); 477 arsort($operators); 478 479 $regex = []; 480 foreach ($operators as $operator => $length) { 481 // an operator that ends with a character must be followed by 482 // a whitespace, a parenthesis, an opening map [ or sequence { 483 $r = preg_quote($operator, '/'); 484 if (ctype_alpha($operator[$length - 1])) { 485 $r .= '(?=[\s()\[{])'; 486 } 487 488 // an operator that begins with a character must not have a dot or pipe before 489 if (ctype_alpha($operator[0])) { 490 $r = '(?<![\.\|])'.$r; 491 } 492 493 // an operator with a space can be any amount of whitespaces 494 $r = preg_replace('/\s+/', '\s+', $r); 495 496 $regex[] = $r; 497 } 498 499 return '/'.implode('|', $regex).'/A'; 500 } 501 502 private function pushState($state) 503 { 504 $this->states[] = $this->state; 505 $this->state = $state; 506 } 507 508 private function popState() 509 { 510 if (0 === \count($this->states)) { 511 throw new \LogicException('Cannot pop state without a previous state.'); 512 } 513 514 $this->state = array_pop($this->states); 515 } 516 } 517 518 class_alias('Twig\Lexer', 'Twig_Lexer');
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Mon Nov 25 19:05:08 2024 | Cross-referenced by PHPXref 0.7.1 |