[ Index ] |
PHP Cross Reference of phpBB-3.3.14-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @package s9e\TextFormatter 5 * @copyright Copyright (c) 2010-2022 The s9e authors 6 * @license http://www.opensource.org/licenses/mit-license.php The MIT License 7 */ 8 namespace s9e\TextFormatter; 9 10 use InvalidArgumentException; 11 use RuntimeException; 12 use s9e\TextFormatter\Parser\FilterProcessing; 13 use s9e\TextFormatter\Parser\Logger; 14 use s9e\TextFormatter\Parser\Tag; 15 16 class Parser 17 { 18 /**#@+ 19 * Boolean rules bitfield 20 */ 21 const RULE_AUTO_CLOSE = 1 << 0; 22 const RULE_AUTO_REOPEN = 1 << 1; 23 const RULE_BREAK_PARAGRAPH = 1 << 2; 24 const RULE_CREATE_PARAGRAPHS = 1 << 3; 25 const RULE_DISABLE_AUTO_BR = 1 << 4; 26 const RULE_ENABLE_AUTO_BR = 1 << 5; 27 const RULE_IGNORE_TAGS = 1 << 6; 28 const RULE_IGNORE_TEXT = 1 << 7; 29 const RULE_IGNORE_WHITESPACE = 1 << 8; 30 const RULE_IS_TRANSPARENT = 1 << 9; 31 const RULE_PREVENT_BR = 1 << 10; 32 const RULE_SUSPEND_AUTO_BR = 1 << 11; 33 const RULE_TRIM_FIRST_LINE = 1 << 12; 34 /**#@-*/ 35 36 /** 37 * Bitwise disjunction of rules related to automatic line breaks 38 */ 39 const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR; 40 41 /** 42 * Bitwise disjunction of rules that are inherited by subcontexts 43 */ 44 const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR; 45 46 /** 47 * All the characters that are considered whitespace 48 */ 49 const WHITESPACE = " \n\t"; 50 51 /** 52 * @var array Number of open tags for each tag name 53 */ 54 protected $cntOpen; 55 56 /** 57 * @var array Number of times each tag has been used 58 */ 59 protected $cntTotal; 60 61 /** 62 * @var array Current context 63 */ 64 protected $context; 65 66 /** 67 * @var integer How hard the parser has worked on fixing bad markup so far 68 */ 69 protected $currentFixingCost; 70 71 /** 72 * @var Tag Current tag being processed 73 */ 74 protected $currentTag; 75 76 /** 77 * @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/> 78 */ 79 protected $isRich; 80 81 /** 82 * @var Logger This parser's logger 83 */ 84 protected $logger; 85 86 /** 87 * @var integer How hard the parser should work on fixing bad markup 88 */ 89 public $maxFixingCost = 10000; 90 91 /** 92 * @var array Associative array of namespace prefixes in use in document (prefixes used as key) 93 */ 94 protected $namespaces; 95 96 /** 97 * @var array Stack of open tags (instances of Tag) 98 */ 99 protected $openTags; 100 101 /** 102 * @var string This parser's output 103 */ 104 protected $output; 105 106 /** 107 * @var integer Position of the cursor in the original text 108 */ 109 protected $pos; 110 111 /** 112 * @var array Array of callbacks, using plugin names as keys 113 */ 114 protected $pluginParsers = []; 115 116 /** 117 * @var array Associative array of [pluginName => pluginConfig] 118 */ 119 protected $pluginsConfig; 120 121 /** 122 * @var array Variables registered for use in filters 123 */ 124 public $registeredVars = []; 125 126 /** 127 * @var array Root context, used at the root of the document 128 */ 129 protected $rootContext; 130 131 /** 132 * @var array Tags' config 133 */ 134 protected $tagsConfig; 135 136 /** 137 * @var array Tag storage 138 */ 139 protected $tagStack; 140 141 /** 142 * @var bool Whether the tags in the stack are sorted 143 */ 144 protected $tagStackIsSorted; 145 146 /** 147 * @var string Text being parsed 148 */ 149 protected $text; 150 151 /** 152 * @var integer Length of the text being parsed 153 */ 154 protected $textLen; 155 156 /** 157 * @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect 158 * whether the parser was reset during execution 159 */ 160 protected $uid = 0; 161 162 /** 163 * @var integer Position before which we output text verbatim, without paragraphs or linebreaks 164 */ 165 protected $wsPos; 166 167 /** 168 * Constructor 169 */ 170 public function __construct(array $config) 171 { 172 $this->pluginsConfig = $config['plugins']; 173 $this->registeredVars = $config['registeredVars']; 174 $this->rootContext = $config['rootContext']; 175 $this->tagsConfig = $config['tags']; 176 177 $this->__wakeup(); 178 } 179 180 /** 181 * Serializer 182 * 183 * Returns the properties that need to persist through serialization. 184 * 185 * NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice 186 * of the serializer to the user (e.g. igbinary) 187 * 188 * @return array 189 */ 190 public function __sleep() 191 { 192 return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig']; 193 } 194 195 /** 196 * Unserializer 197 * 198 * @return void 199 */ 200 public function __wakeup() 201 { 202 $this->logger = new Logger; 203 } 204 205 /** 206 * Reset the parser for a new parsing 207 * 208 * @param string $text Text to be parsed 209 * @return void 210 */ 211 protected function reset($text) 212 { 213 // Reject invalid UTF-8 214 if (!preg_match('//u', $text)) 215 { 216 throw new InvalidArgumentException('Invalid UTF-8 input'); 217 } 218 219 // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML 220 $text = preg_replace('/\\r\\n?/', "\n", $text); 221 $text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text); 222 223 // Clear the logs 224 $this->logger->clear(); 225 226 // Initialize the rest 227 $this->cntOpen = []; 228 $this->cntTotal = []; 229 $this->currentFixingCost = 0; 230 $this->currentTag = null; 231 $this->isRich = false; 232 $this->namespaces = []; 233 $this->openTags = []; 234 $this->output = ''; 235 $this->pos = 0; 236 $this->tagStack = []; 237 $this->tagStackIsSorted = false; 238 $this->text = $text; 239 $this->textLen = strlen($text); 240 $this->wsPos = 0; 241 242 // Initialize the root context 243 $this->context = $this->rootContext; 244 $this->context['inParagraph'] = false; 245 246 // Bump the UID 247 ++$this->uid; 248 } 249 250 /** 251 * Set a tag's option 252 * 253 * This method ensures that the tag's config is a value and not a reference, to prevent 254 * potential side-effects. References contained *inside* the tag's config are left untouched 255 * 256 * @param string $tagName Tag's name 257 * @param string $optionName Option's name 258 * @param mixed $optionValue Option's value 259 * @return void 260 */ 261 protected function setTagOption($tagName, $optionName, $optionValue) 262 { 263 if (isset($this->tagsConfig[$tagName])) 264 { 265 // Copy the tag's config and remove it. That will destroy the reference 266 $tagConfig = $this->tagsConfig[$tagName]; 267 unset($this->tagsConfig[$tagName]); 268 269 // Set the new value and replace the tag's config 270 $tagConfig[$optionName] = $optionValue; 271 $this->tagsConfig[$tagName] = $tagConfig; 272 } 273 } 274 275 //========================================================================== 276 // Public API 277 //========================================================================== 278 279 /** 280 * Disable a tag 281 * 282 * @param string $tagName Name of the tag 283 * @return void 284 */ 285 public function disableTag($tagName) 286 { 287 $this->setTagOption($tagName, 'isDisabled', true); 288 } 289 290 /** 291 * Enable a tag 292 * 293 * @param string $tagName Name of the tag 294 * @return void 295 */ 296 public function enableTag($tagName) 297 { 298 if (isset($this->tagsConfig[$tagName])) 299 { 300 unset($this->tagsConfig[$tagName]['isDisabled']); 301 } 302 } 303 304 /** 305 * Get this parser's Logger instance 306 * 307 * @return Logger 308 */ 309 public function getLogger() 310 { 311 return $this->logger; 312 } 313 314 /** 315 * Return the last text parsed 316 * 317 * This method returns the normalized text, which may be slightly different from the original 318 * text in that EOLs are normalized to LF and other control codes are stripped. This method is 319 * meant to be used in support of processing log entries, which contain offsets based on the 320 * normalized text 321 * 322 * @see Parser::reset() 323 * 324 * @return string 325 */ 326 public function getText() 327 { 328 return $this->text; 329 } 330 331 /** 332 * Parse a text 333 * 334 * @param string $text Text to parse 335 * @return string XML representation 336 */ 337 public function parse($text) 338 { 339 // Reset the parser and save the uid 340 $this->reset($text); 341 $uid = $this->uid; 342 343 // Do the heavy lifting 344 $this->executePluginParsers(); 345 $this->processTags(); 346 347 // Finalize the document 348 $this->finalizeOutput(); 349 350 // Check the uid in case a plugin or a filter reset the parser mid-execution 351 if ($this->uid !== $uid) 352 { 353 throw new RuntimeException('The parser has been reset during execution'); 354 } 355 356 // Log a warning if the fixing cost limit was exceeded 357 if ($this->currentFixingCost > $this->maxFixingCost) 358 { 359 $this->logger->warn('Fixing cost limit exceeded'); 360 } 361 362 return $this->output; 363 } 364 365 /** 366 * Change a tag's tagLimit 367 * 368 * NOTE: the default tagLimit should generally be set during configuration instead 369 * 370 * @param string $tagName The tag's name, in UPPERCASE 371 * @param integer $tagLimit 372 * @return void 373 */ 374 public function setTagLimit($tagName, $tagLimit) 375 { 376 $this->setTagOption($tagName, 'tagLimit', $tagLimit); 377 } 378 379 /** 380 * Change a tag's nestingLimit 381 * 382 * NOTE: the default nestingLimit should generally be set during configuration instead 383 * 384 * @param string $tagName The tag's name, in UPPERCASE 385 * @param integer $nestingLimit 386 * @return void 387 */ 388 public function setNestingLimit($tagName, $nestingLimit) 389 { 390 $this->setTagOption($tagName, 'nestingLimit', $nestingLimit); 391 } 392 393 //========================================================================== 394 // Output handling 395 //========================================================================== 396 397 /** 398 * Finalize the output by appending the rest of the unprocessed text and create the root node 399 * 400 * @return void 401 */ 402 protected function finalizeOutput() 403 { 404 // Output the rest of the text and close the last paragraph 405 $this->outputText($this->textLen, 0, true); 406 407 // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs 408 do 409 { 410 $this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt); 411 } 412 while ($cnt > 0); 413 414 // Merge consecutive <i> tags 415 if (strpos($this->output, '</i><i>') !== false) 416 { 417 $this->output = str_replace('</i><i>', '', $this->output); 418 } 419 420 // Remove control characters from the output to ensure it's valid XML 421 $this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F])', '', $this->output); 422 423 // Encode Unicode characters that are outside of the BMP 424 $this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output); 425 426 // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>) 427 $tagName = ($this->isRich) ? 'r' : 't'; 428 429 // Prepare the root node with all the namespace declarations 430 $tmp = '<' . $tagName; 431 foreach (array_keys($this->namespaces) as $prefix) 432 { 433 $tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"'; 434 } 435 436 $this->output = $tmp . '>' . $this->output . '</' . $tagName . '>'; 437 } 438 439 /** 440 * Append a tag to the output 441 * 442 * @param Tag $tag Tag to append 443 * @return void 444 */ 445 protected function outputTag(Tag $tag) 446 { 447 $this->isRich = true; 448 449 $tagName = $tag->getName(); 450 $tagPos = $tag->getPos(); 451 $tagLen = $tag->getLen(); 452 $tagFlags = $tag->getFlags(); 453 454 if ($tagFlags & self::RULE_IGNORE_WHITESPACE) 455 { 456 $skipBefore = 1; 457 $skipAfter = ($tag->isEndTag()) ? 2 : 1; 458 } 459 else 460 { 461 $skipBefore = $skipAfter = 0; 462 } 463 464 // Current paragraph must end before the tag if: 465 // - the tag is a start (or self-closing) tag and it breaks paragraphs, or 466 // - the tag is an end tag (but not self-closing) 467 $closeParagraph = (!$tag->isStartTag() || ($tagFlags & self::RULE_BREAK_PARAGRAPH)); 468 469 // Let the cursor catch up with this tag's position 470 $this->outputText($tagPos, $skipBefore, $closeParagraph); 471 472 // Capture the text consumed by the tag 473 $tagText = ($tagLen) 474 ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8') 475 : ''; 476 477 // Output current tag 478 if ($tag->isStartTag()) 479 { 480 // Handle paragraphs before opening the tag 481 if (!($tagFlags & self::RULE_BREAK_PARAGRAPH)) 482 { 483 $this->outputParagraphStart($tagPos); 484 } 485 486 // Record this tag's namespace, if applicable 487 $colonPos = strpos($tagName, ':'); 488 if ($colonPos) 489 { 490 $this->namespaces[substr($tagName, 0, $colonPos)] = 0; 491 } 492 493 // Open the start tag and add its attributes, but don't close the tag 494 $this->output .= '<' . $tagName; 495 496 // We output the attributes in lexical order. Helps canonicalizing the output and could 497 // prove useful someday 498 $attributes = $tag->getAttributes(); 499 ksort($attributes); 500 501 foreach ($attributes as $attrName => $attrValue) 502 { 503 $this->output .= ' ' . $attrName . '="' . str_replace("\n", ' ', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"'; 504 } 505 506 if ($tag->isSelfClosingTag()) 507 { 508 if ($tagLen) 509 { 510 $this->output .= '>' . $tagText . '</' . $tagName . '>'; 511 } 512 else 513 { 514 $this->output .= '/>'; 515 } 516 } 517 elseif ($tagLen) 518 { 519 $this->output .= '><s>' . $tagText . '</s>'; 520 } 521 else 522 { 523 $this->output .= '>'; 524 } 525 } 526 else 527 { 528 if ($tagLen) 529 { 530 $this->output .= '<e>' . $tagText . '</e>'; 531 } 532 533 $this->output .= '</' . $tagName . '>'; 534 } 535 536 // Move the cursor past the tag 537 $this->pos = $tagPos + $tagLen; 538 539 // Skip newlines (no other whitespace) after this tag 540 $this->wsPos = $this->pos; 541 while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n") 542 { 543 // Decrement the number of lines to skip 544 --$skipAfter; 545 546 // Move the cursor past the newline 547 ++$this->wsPos; 548 } 549 } 550 551 /** 552 * Output the text between the cursor's position (included) and given position (not included) 553 * 554 * @param integer $catchupPos Position we're catching up to 555 * @param integer $maxLines Maximum number of lines to ignore at the end of the text 556 * @param bool $closeParagraph Whether to close the paragraph at the end, if applicable 557 * @return void 558 */ 559 protected function outputText($catchupPos, $maxLines, $closeParagraph) 560 { 561 if ($closeParagraph) 562 { 563 if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)) 564 { 565 $closeParagraph = false; 566 } 567 else 568 { 569 // Ignore any number of lines at the end if we're closing a paragraph 570 $maxLines = -1; 571 } 572 } 573 574 if ($this->pos >= $catchupPos) 575 { 576 // We're already there, close the paragraph if applicable and return 577 if ($closeParagraph) 578 { 579 $this->outputParagraphEnd(); 580 } 581 582 return; 583 } 584 585 // Skip over previously identified whitespace if applicable 586 if ($this->wsPos > $this->pos) 587 { 588 $skipPos = min($catchupPos, $this->wsPos); 589 $this->output .= substr($this->text, $this->pos, $skipPos - $this->pos); 590 $this->pos = $skipPos; 591 592 if ($this->pos >= $catchupPos) 593 { 594 // Skipped everything. Close the paragraph if applicable and return 595 if ($closeParagraph) 596 { 597 $this->outputParagraphEnd(); 598 } 599 600 return; 601 } 602 } 603 604 // Test whether we're even supposed to output anything 605 if ($this->context['flags'] & self::RULE_IGNORE_TEXT) 606 { 607 $catchupLen = $catchupPos - $this->pos; 608 $catchupText = substr($this->text, $this->pos, $catchupLen); 609 610 // If the catchup text is not entirely composed of whitespace, we put it inside ignore 611 // tags 612 if (strspn($catchupText, " \n\t") < $catchupLen) 613 { 614 $catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>'; 615 } 616 617 $this->output .= $catchupText; 618 $this->pos = $catchupPos; 619 620 if ($closeParagraph) 621 { 622 $this->outputParagraphEnd(); 623 } 624 625 return; 626 } 627 628 // Compute the amount of text to ignore at the end of the output 629 $ignorePos = $catchupPos; 630 $ignoreLen = 0; 631 632 // Ignore as many lines (including whitespace) as specified 633 while ($maxLines && --$ignorePos >= $this->pos) 634 { 635 $c = $this->text[$ignorePos]; 636 if (strpos(self::WHITESPACE, $c) === false) 637 { 638 break; 639 } 640 641 if ($c === "\n") 642 { 643 --$maxLines; 644 } 645 646 ++$ignoreLen; 647 } 648 649 // Adjust $catchupPos to ignore the text at the end 650 $catchupPos -= $ignoreLen; 651 652 // Break down the text in paragraphs if applicable 653 if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS) 654 { 655 if (!$this->context['inParagraph']) 656 { 657 $this->outputWhitespace($catchupPos); 658 659 if ($catchupPos > $this->pos) 660 { 661 $this->outputParagraphStart($catchupPos); 662 } 663 } 664 665 // Look for a paragraph break in this text 666 $pbPos = strpos($this->text, "\n\n", $this->pos); 667 668 while ($pbPos !== false && $pbPos < $catchupPos) 669 { 670 $this->outputText($pbPos, 0, true); 671 $this->outputParagraphStart($catchupPos); 672 673 $pbPos = strpos($this->text, "\n\n", $this->pos); 674 } 675 } 676 677 // Capture, escape and output the text 678 if ($catchupPos > $this->pos) 679 { 680 $catchupText = htmlspecialchars( 681 substr($this->text, $this->pos, $catchupPos - $this->pos), 682 ENT_NOQUOTES, 683 'UTF-8' 684 ); 685 686 // Format line breaks if applicable 687 if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR) 688 { 689 $catchupText = str_replace("\n", "<br/>\n", $catchupText); 690 } 691 692 $this->output .= $catchupText; 693 } 694 695 // Close the paragraph if applicable 696 if ($closeParagraph) 697 { 698 $this->outputParagraphEnd(); 699 } 700 701 // Add the ignored text if applicable 702 if ($ignoreLen) 703 { 704 $this->output .= substr($this->text, $catchupPos, $ignoreLen); 705 } 706 707 // Move the cursor past the text 708 $this->pos = $catchupPos + $ignoreLen; 709 } 710 711 /** 712 * Output a linebreak tag 713 * 714 * @param Tag $tag 715 * @return void 716 */ 717 protected function outputBrTag(Tag $tag) 718 { 719 $this->outputText($tag->getPos(), 0, false); 720 $this->output .= '<br/>'; 721 } 722 723 /** 724 * Output an ignore tag 725 * 726 * @param Tag $tag 727 * @return void 728 */ 729 protected function outputIgnoreTag(Tag $tag) 730 { 731 $tagPos = $tag->getPos(); 732 $tagLen = $tag->getLen(); 733 734 // Capture the text to ignore 735 $ignoreText = substr($this->text, $tagPos, $tagLen); 736 737 // Catch up with the tag's position then output the tag 738 $this->outputText($tagPos, 0, false); 739 $this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>'; 740 $this->isRich = true; 741 742 // Move the cursor past this tag 743 $this->pos = $tagPos + $tagLen; 744 } 745 746 /** 747 * Start a paragraph between current position and given position, if applicable 748 * 749 * @param integer $maxPos Rightmost position at which the paragraph can be opened 750 * @return void 751 */ 752 protected function outputParagraphStart($maxPos) 753 { 754 // Do nothing if we're already in a paragraph, or if we don't use paragraphs 755 if ($this->context['inParagraph'] 756 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)) 757 { 758 return; 759 } 760 761 // Output the whitespace between $this->pos and $maxPos if applicable 762 $this->outputWhitespace($maxPos); 763 764 // Open the paragraph, but only if it's not at the very end of the text 765 if ($this->pos < $this->textLen) 766 { 767 $this->output .= '<p>'; 768 $this->context['inParagraph'] = true; 769 } 770 } 771 772 /** 773 * Close current paragraph at current position if applicable 774 * 775 * @return void 776 */ 777 protected function outputParagraphEnd() 778 { 779 // Do nothing if we're not in a paragraph 780 if (!$this->context['inParagraph']) 781 { 782 return; 783 } 784 785 $this->output .= '</p>'; 786 $this->context['inParagraph'] = false; 787 } 788 789 /** 790 * Output the content of a verbatim tag 791 * 792 * @param Tag $tag 793 * @return void 794 */ 795 protected function outputVerbatim(Tag $tag) 796 { 797 $flags = $this->context['flags']; 798 $this->context['flags'] = $tag->getFlags(); 799 $this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false); 800 $this->context['flags'] = $flags; 801 } 802 803 /** 804 * Skip as much whitespace after current position as possible 805 * 806 * @param integer $maxPos Rightmost character to be skipped 807 * @return void 808 */ 809 protected function outputWhitespace($maxPos) 810 { 811 if ($maxPos > $this->pos) 812 { 813 $spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos); 814 815 if ($spn) 816 { 817 $this->output .= substr($this->text, $this->pos, $spn); 818 $this->pos += $spn; 819 } 820 } 821 } 822 823 //========================================================================== 824 // Plugins handling 825 //========================================================================== 826 827 /** 828 * Disable a plugin 829 * 830 * @param string $pluginName Name of the plugin 831 * @return void 832 */ 833 public function disablePlugin($pluginName) 834 { 835 if (isset($this->pluginsConfig[$pluginName])) 836 { 837 // Copy the plugin's config to remove the reference 838 $pluginConfig = $this->pluginsConfig[$pluginName]; 839 unset($this->pluginsConfig[$pluginName]); 840 841 // Update the value and replace the plugin's config 842 $pluginConfig['isDisabled'] = true; 843 $this->pluginsConfig[$pluginName] = $pluginConfig; 844 } 845 } 846 847 /** 848 * Enable a plugin 849 * 850 * @param string $pluginName Name of the plugin 851 * @return void 852 */ 853 public function enablePlugin($pluginName) 854 { 855 if (isset($this->pluginsConfig[$pluginName])) 856 { 857 $this->pluginsConfig[$pluginName]['isDisabled'] = false; 858 } 859 } 860 861 /** 862 * Execute given plugin 863 * 864 * @param string $pluginName Plugin's name 865 * @return void 866 */ 867 protected function executePluginParser($pluginName) 868 { 869 $pluginConfig = $this->pluginsConfig[$pluginName]; 870 if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false) 871 { 872 return; 873 } 874 875 $matches = []; 876 if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit'])) 877 { 878 $matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']); 879 if (empty($matches)) 880 { 881 return; 882 } 883 } 884 885 // Execute the plugin's parser, which will add tags via $this->addStartTag() and others 886 call_user_func($this->getPluginParser($pluginName), $this->text, $matches); 887 } 888 889 /** 890 * Execute all the plugins 891 * 892 * @return void 893 */ 894 protected function executePluginParsers() 895 { 896 foreach ($this->pluginsConfig as $pluginName => $pluginConfig) 897 { 898 if (empty($pluginConfig['isDisabled'])) 899 { 900 $this->executePluginParser($pluginName); 901 } 902 } 903 } 904 905 /** 906 * Execute given regexp and returns as many matches as given limit 907 * 908 * @param string $regexp 909 * @param integer $limit 910 * @return array 911 */ 912 protected function getMatches($regexp, $limit) 913 { 914 $cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); 915 if ($cnt > $limit) 916 { 917 $matches = array_slice($matches, 0, $limit); 918 } 919 920 return $matches; 921 } 922 923 /** 924 * Get the cached callback for given plugin's parser 925 * 926 * @param string $pluginName Plugin's name 927 * @return callable 928 */ 929 protected function getPluginParser($pluginName) 930 { 931 // Cache a new instance of this plugin's parser if there isn't one already 932 if (!isset($this->pluginParsers[$pluginName])) 933 { 934 $pluginConfig = $this->pluginsConfig[$pluginName]; 935 $className = (isset($pluginConfig['className'])) 936 ? $pluginConfig['className'] 937 : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser'; 938 939 // Register the parser as a callback 940 $this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse']; 941 } 942 943 return $this->pluginParsers[$pluginName]; 944 } 945 946 /** 947 * Register a parser 948 * 949 * Can be used to add a new parser with no plugin config, or pre-generate a parser for an 950 * existing plugin 951 * 952 * @param string $pluginName 953 * @param callable $parser 954 * @param string $regexp 955 * @param integer $limit 956 * @return void 957 */ 958 public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX) 959 { 960 if (!is_callable($parser)) 961 { 962 throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback'); 963 } 964 // Create an empty config for this plugin to ensure it is executed 965 if (!isset($this->pluginsConfig[$pluginName])) 966 { 967 $this->pluginsConfig[$pluginName] = []; 968 } 969 if (isset($regexp)) 970 { 971 $this->pluginsConfig[$pluginName]['regexp'] = $regexp; 972 $this->pluginsConfig[$pluginName]['regexpLimit'] = $limit; 973 } 974 $this->pluginParsers[$pluginName] = $parser; 975 } 976 977 //========================================================================== 978 // Rules handling 979 //========================================================================== 980 981 /** 982 * Apply closeAncestor rules associated with given tag 983 * 984 * @param Tag $tag Tag 985 * @return bool Whether a new tag has been added 986 */ 987 protected function closeAncestor(Tag $tag) 988 { 989 if (!empty($this->openTags)) 990 { 991 $tagName = $tag->getName(); 992 $tagConfig = $this->tagsConfig[$tagName]; 993 994 if (!empty($tagConfig['rules']['closeAncestor'])) 995 { 996 $i = count($this->openTags); 997 998 while (--$i >= 0) 999 { 1000 $ancestor = $this->openTags[$i]; 1001 $ancestorName = $ancestor->getName(); 1002 1003 if (isset($tagConfig['rules']['closeAncestor'][$ancestorName])) 1004 { 1005 ++$this->currentFixingCost; 1006 1007 // We have to close this ancestor. First we reinsert this tag... 1008 $this->tagStack[] = $tag; 1009 1010 // ...then we add a new end tag for it with a better priority 1011 $this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1); 1012 1013 return true; 1014 } 1015 } 1016 } 1017 } 1018 1019 return false; 1020 } 1021 1022 /** 1023 * Apply closeParent rules associated with given tag 1024 * 1025 * @param Tag $tag Tag 1026 * @return bool Whether a new tag has been added 1027 */ 1028 protected function closeParent(Tag $tag) 1029 { 1030 if (!empty($this->openTags)) 1031 { 1032 $tagName = $tag->getName(); 1033 $tagConfig = $this->tagsConfig[$tagName]; 1034 1035 if (!empty($tagConfig['rules']['closeParent'])) 1036 { 1037 $parent = end($this->openTags); 1038 $parentName = $parent->getName(); 1039 1040 if (isset($tagConfig['rules']['closeParent'][$parentName])) 1041 { 1042 ++$this->currentFixingCost; 1043 1044 // We have to close that parent. First we reinsert the tag... 1045 $this->tagStack[] = $tag; 1046 1047 // ...then we add a new end tag for it with a better priority 1048 $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1); 1049 1050 return true; 1051 } 1052 } 1053 } 1054 1055 return false; 1056 } 1057 1058 /** 1059 * Apply the createChild rules associated with given tag 1060 * 1061 * @param Tag $tag Tag 1062 * @return void 1063 */ 1064 protected function createChild(Tag $tag) 1065 { 1066 $tagConfig = $this->tagsConfig[$tag->getName()]; 1067 if (isset($tagConfig['rules']['createChild'])) 1068 { 1069 $priority = -1000; 1070 $tagPos = $this->pos + strspn($this->text, " \n\r\t", $this->pos); 1071 foreach ($tagConfig['rules']['createChild'] as $tagName) 1072 { 1073 $this->addStartTag($tagName, $tagPos, 0, ++$priority); 1074 } 1075 } 1076 } 1077 1078 /** 1079 * Apply fosterParent rules associated with given tag 1080 * 1081 * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to 1082 * foster itself or two or more tags try to foster each other in a loop. We mitigate the 1083 * risk by preventing a tag from creating a child of itself (the parent still gets closed) 1084 * and by checking and increasing the currentFixingCost so that a loop of multiple tags 1085 * do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the 1086 * loop from running indefinitely 1087 * 1088 * @param Tag $tag Tag 1089 * @return bool Whether a new tag has been added 1090 */ 1091 protected function fosterParent(Tag $tag) 1092 { 1093 if (!empty($this->openTags)) 1094 { 1095 $tagName = $tag->getName(); 1096 $tagConfig = $this->tagsConfig[$tagName]; 1097 1098 if (!empty($tagConfig['rules']['fosterParent'])) 1099 { 1100 $parent = end($this->openTags); 1101 $parentName = $parent->getName(); 1102 1103 if (isset($tagConfig['rules']['fosterParent'][$parentName])) 1104 { 1105 if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost) 1106 { 1107 $this->addFosterTag($tag, $parent); 1108 } 1109 1110 // Reinsert current tag 1111 $this->tagStack[] = $tag; 1112 1113 // And finally close its parent with a priority that ensures it is processed 1114 // before this tag 1115 $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1); 1116 1117 // Adjust the fixing cost to account for the additional tags/processing 1118 $this->currentFixingCost += 4; 1119 1120 return true; 1121 } 1122 } 1123 } 1124 1125 return false; 1126 } 1127 1128 /** 1129 * Apply requireAncestor rules associated with given tag 1130 * 1131 * @param Tag $tag Tag 1132 * @return bool Whether this tag has an unfulfilled requireAncestor requirement 1133 */ 1134 protected function requireAncestor(Tag $tag) 1135 { 1136 $tagName = $tag->getName(); 1137 $tagConfig = $this->tagsConfig[$tagName]; 1138 1139 if (isset($tagConfig['rules']['requireAncestor'])) 1140 { 1141 foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName) 1142 { 1143 if (!empty($this->cntOpen[$ancestorName])) 1144 { 1145 return false; 1146 } 1147 } 1148 1149 $this->logger->err('Tag requires an ancestor', [ 1150 'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']), 1151 'tag' => $tag 1152 ]); 1153 1154 return true; 1155 } 1156 1157 return false; 1158 } 1159 1160 //========================================================================== 1161 // Tag processing 1162 //========================================================================== 1163 1164 /** 1165 * Create and add a copy of a tag as a child of a given tag 1166 * 1167 * @param Tag $tag Current tag 1168 * @param Tag $fosterTag Tag to foster 1169 * @return void 1170 */ 1171 protected function addFosterTag(Tag $tag, Tag $fosterTag) 1172 { 1173 list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen()); 1174 1175 // Add a 0-width copy of the parent tag after this tag and make it depend on this tag 1176 $childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio); 1177 $tag->cascadeInvalidationTo($childTag); 1178 } 1179 1180 /** 1181 * Create and add an end tag for given start tag at given position 1182 * 1183 * @param Tag $startTag Start tag 1184 * @param integer $tagPos End tag's position (will be adjusted for whitespace if applicable) 1185 * @param integer $prio End tag's priority 1186 * @return Tag 1187 */ 1188 protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0) 1189 { 1190 $tagName = $startTag->getName(); 1191 1192 // Adjust the end tag's position if whitespace is to be minimized 1193 if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE) 1194 { 1195 $tagPos = $this->getMagicEndPos($tagPos); 1196 } 1197 1198 // Add a 0-width end tag that is paired with the given start tag 1199 $endTag = $this->addEndTag($tagName, $tagPos, 0, $prio); 1200 $endTag->pairWith($startTag); 1201 1202 return $endTag; 1203 } 1204 1205 /** 1206 * Compute the position of a magic end tag, adjusted for whitespace 1207 * 1208 * @param integer $tagPos Rightmost possible position for the tag 1209 * @return integer 1210 */ 1211 protected function getMagicEndPos($tagPos) 1212 { 1213 // Back up from given position to the cursor's position until we find a character that 1214 // is not whitespace 1215 while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false) 1216 { 1217 --$tagPos; 1218 } 1219 1220 return $tagPos; 1221 } 1222 1223 /** 1224 * Compute the position and priority of a magic start tag, adjusted for whitespace 1225 * 1226 * @param integer $tagPos Leftmost possible position for the tag 1227 * @return integer[] [Tag pos, priority] 1228 */ 1229 protected function getMagicStartCoords($tagPos) 1230 { 1231 if (empty($this->tagStack)) 1232 { 1233 // Set the next position outside the text boundaries 1234 $nextPos = $this->textLen + 1; 1235 $nextPrio = 0; 1236 } 1237 else 1238 { 1239 $nextTag = end($this->tagStack); 1240 $nextPos = $nextTag->getPos(); 1241 $nextPrio = $nextTag->getSortPriority(); 1242 } 1243 1244 // Find the first non-whitespace position before next tag or the end of text 1245 while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false) 1246 { 1247 ++$tagPos; 1248 } 1249 1250 // Set a priority that ensures this tag appears before the next tag 1251 $prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0; 1252 1253 return [$tagPos, $prio]; 1254 } 1255 1256 /** 1257 * Test whether given start tag is immediately followed by a closing tag 1258 * 1259 * @param Tag $tag Start tag 1260 * @return bool 1261 */ 1262 protected function isFollowedByClosingTag(Tag $tag) 1263 { 1264 return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag); 1265 } 1266 1267 /** 1268 * Process all tags in the stack 1269 * 1270 * @return void 1271 */ 1272 protected function processTags() 1273 { 1274 if (empty($this->tagStack)) 1275 { 1276 return; 1277 } 1278 1279 // Initialize the count tables 1280 foreach (array_keys($this->tagsConfig) as $tagName) 1281 { 1282 $this->cntOpen[$tagName] = 0; 1283 $this->cntTotal[$tagName] = 0; 1284 } 1285 1286 // Process the tag stack, close tags that were left open and repeat until done 1287 do 1288 { 1289 while (!empty($this->tagStack)) 1290 { 1291 if (!$this->tagStackIsSorted) 1292 { 1293 $this->sortTags(); 1294 } 1295 1296 $this->currentTag = array_pop($this->tagStack); 1297 $this->processCurrentTag(); 1298 } 1299 1300 // Close tags that were left open 1301 foreach ($this->openTags as $startTag) 1302 { 1303 // NOTE: we add tags in hierarchical order (ancestors to descendants) but since 1304 // the stack is processed in LIFO order, it means that tags get closed in 1305 // the correct order, from descendants to ancestors 1306 $this->addMagicEndTag($startTag, $this->textLen); 1307 } 1308 } 1309 while (!empty($this->tagStack)); 1310 } 1311 1312 /** 1313 * Process current tag 1314 * 1315 * @return void 1316 */ 1317 protected function processCurrentTag() 1318 { 1319 // Invalidate current tag if tags are disabled and current tag would not close the last open 1320 // tag and is not a system tag 1321 if (($this->context['flags'] & self::RULE_IGNORE_TAGS) 1322 && !$this->currentTag->canClose(end($this->openTags)) 1323 && !$this->currentTag->isSystemTag()) 1324 { 1325 $this->currentTag->invalidate(); 1326 } 1327 1328 $tagPos = $this->currentTag->getPos(); 1329 $tagLen = $this->currentTag->getLen(); 1330 1331 // Test whether the cursor passed this tag's position already 1332 if ($this->pos > $tagPos && !$this->currentTag->isInvalid()) 1333 { 1334 // Test whether this tag is paired with a start tag and this tag is still open 1335 $startTag = $this->currentTag->getStartTag(); 1336 1337 if ($startTag && in_array($startTag, $this->openTags, true)) 1338 { 1339 // Create an end tag that matches current tag's start tag, which consumes as much of 1340 // the same text as current tag and is paired with the same start tag 1341 $this->addEndTag( 1342 $startTag->getName(), 1343 $this->pos, 1344 max(0, $tagPos + $tagLen - $this->pos) 1345 )->pairWith($startTag); 1346 1347 // Note that current tag is not invalidated, it's merely replaced 1348 return; 1349 } 1350 1351 // If this is an ignore tag, try to ignore as much as the remaining text as possible 1352 if ($this->currentTag->isIgnoreTag()) 1353 { 1354 $ignoreLen = $tagPos + $tagLen - $this->pos; 1355 1356 if ($ignoreLen > 0) 1357 { 1358 // Create a new ignore tag and move on 1359 $this->addIgnoreTag($this->pos, $ignoreLen); 1360 1361 return; 1362 } 1363 } 1364 1365 // Skipped tags are invalidated 1366 $this->currentTag->invalidate(); 1367 } 1368 1369 if ($this->currentTag->isInvalid()) 1370 { 1371 return; 1372 } 1373 1374 if ($this->currentTag->isIgnoreTag()) 1375 { 1376 $this->outputIgnoreTag($this->currentTag); 1377 } 1378 elseif ($this->currentTag->isBrTag()) 1379 { 1380 // Output the tag if it's allowed, ignore it otherwise 1381 if (!($this->context['flags'] & self::RULE_PREVENT_BR)) 1382 { 1383 $this->outputBrTag($this->currentTag); 1384 } 1385 } 1386 elseif ($this->currentTag->isParagraphBreak()) 1387 { 1388 $this->outputText($this->currentTag->getPos(), 0, true); 1389 } 1390 elseif ($this->currentTag->isVerbatim()) 1391 { 1392 $this->outputVerbatim($this->currentTag); 1393 } 1394 elseif ($this->currentTag->isStartTag()) 1395 { 1396 $this->processStartTag($this->currentTag); 1397 } 1398 else 1399 { 1400 $this->processEndTag($this->currentTag); 1401 } 1402 } 1403 1404 /** 1405 * Process given start tag (including self-closing tags) at current position 1406 * 1407 * @param Tag $tag Start tag (including self-closing) 1408 * @return void 1409 */ 1410 protected function processStartTag(Tag $tag) 1411 { 1412 $tagName = $tag->getName(); 1413 $tagConfig = $this->tagsConfig[$tagName]; 1414 1415 // 1. Check that this tag has not reached its global limit tagLimit 1416 // 2. Execute this tag's filterChain, which will filter/validate its attributes 1417 // 3. Apply closeParent, closeAncestor and fosterParent rules 1418 // 4. Check for nestingLimit 1419 // 5. Apply requireAncestor rules 1420 // 1421 // This order ensures that the tag is valid and within the set limits before we attempt to 1422 // close parents or ancestors. We need to close ancestors before we can check for nesting 1423 // limits, whether this tag is allowed within current context (the context may change 1424 // as ancestors are closed) or whether the required ancestors are still there (they might 1425 // have been closed by a rule.) 1426 if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit']) 1427 { 1428 $this->logger->err( 1429 'Tag limit exceeded', 1430 [ 1431 'tag' => $tag, 1432 'tagName' => $tagName, 1433 'tagLimit' => $tagConfig['tagLimit'] 1434 ] 1435 ); 1436 $tag->invalidate(); 1437 1438 return; 1439 } 1440 1441 FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags); 1442 if ($tag->isInvalid()) 1443 { 1444 return; 1445 } 1446 1447 if ($this->currentFixingCost < $this->maxFixingCost) 1448 { 1449 if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag)) 1450 { 1451 // This tag parent/ancestor needs to be closed, we just return (the tag is still valid) 1452 return; 1453 } 1454 } 1455 1456 if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit']) 1457 { 1458 $this->logger->err( 1459 'Nesting limit exceeded', 1460 [ 1461 'tag' => $tag, 1462 'tagName' => $tagName, 1463 'nestingLimit' => $tagConfig['nestingLimit'] 1464 ] 1465 ); 1466 $tag->invalidate(); 1467 1468 return; 1469 } 1470 1471 if (!$this->tagIsAllowed($tagName)) 1472 { 1473 $msg = 'Tag is not allowed in this context'; 1474 $context = ['tag' => $tag, 'tagName' => $tagName]; 1475 if ($tag->getLen() > 0) 1476 { 1477 $this->logger->warn($msg, $context); 1478 } 1479 else 1480 { 1481 $this->logger->debug($msg, $context); 1482 } 1483 $tag->invalidate(); 1484 1485 return; 1486 } 1487 1488 if ($this->requireAncestor($tag)) 1489 { 1490 $tag->invalidate(); 1491 1492 return; 1493 } 1494 1495 // If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or 1496 // immediately followed by an end tag, we replace it with a self-closing tag with the same 1497 // properties 1498 if ($tag->getFlags() & self::RULE_AUTO_CLOSE 1499 && !$tag->isSelfClosingTag() 1500 && !$tag->getEndTag() 1501 && !$this->isFollowedByClosingTag($tag)) 1502 { 1503 $newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen()); 1504 $newTag->setAttributes($tag->getAttributes()); 1505 $newTag->setFlags($tag->getFlags()); 1506 1507 $tag = $newTag; 1508 } 1509 1510 if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE 1511 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n") 1512 { 1513 $this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1); 1514 } 1515 1516 // This tag is valid, output it and update the context 1517 $this->outputTag($tag); 1518 $this->pushContext($tag); 1519 1520 // Apply the createChild rules if applicable 1521 $this->createChild($tag); 1522 } 1523 1524 /** 1525 * Process given end tag at current position 1526 * 1527 * @param Tag $tag end tag 1528 * @return void 1529 */ 1530 protected function processEndTag(Tag $tag) 1531 { 1532 $tagName = $tag->getName(); 1533 1534 if (empty($this->cntOpen[$tagName])) 1535 { 1536 // This is an end tag with no start tag 1537 return; 1538 } 1539 1540 /** 1541 * @var array List of tags need to be closed before given tag 1542 */ 1543 $closeTags = []; 1544 1545 // Iterate through all open tags from last to first to find a match for our tag 1546 $i = count($this->openTags); 1547 while (--$i >= 0) 1548 { 1549 $openTag = $this->openTags[$i]; 1550 1551 if ($tag->canClose($openTag)) 1552 { 1553 break; 1554 } 1555 1556 $closeTags[] = $openTag; 1557 ++$this->currentFixingCost; 1558 } 1559 1560 if ($i < 0) 1561 { 1562 // Did not find a matching tag 1563 $this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]); 1564 1565 return; 1566 } 1567 1568 // Accumulate flags to determine whether whitespace should be trimmed 1569 $flags = $tag->getFlags(); 1570 foreach ($closeTags as $openTag) 1571 { 1572 $flags |= $openTag->getFlags(); 1573 } 1574 $ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE); 1575 1576 // Only reopen tags if we haven't exceeded our "fixing" budget 1577 $keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost); 1578 1579 // Iterate over tags that are being closed, output their end tag and collect tags to be 1580 // reopened 1581 $reopenTags = []; 1582 foreach ($closeTags as $openTag) 1583 { 1584 $openTagName = $openTag->getName(); 1585 1586 // Test whether this tag should be reopened automatically 1587 if ($keepReopening) 1588 { 1589 if ($openTag->getFlags() & self::RULE_AUTO_REOPEN) 1590 { 1591 $reopenTags[] = $openTag; 1592 } 1593 else 1594 { 1595 $keepReopening = false; 1596 } 1597 } 1598 1599 // Find the earliest position we can close this open tag 1600 $tagPos = $tag->getPos(); 1601 if ($ignoreWhitespace) 1602 { 1603 $tagPos = $this->getMagicEndPos($tagPos); 1604 } 1605 1606 // Output an end tag to close this start tag, then update the context 1607 $endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0); 1608 $endTag->setFlags($openTag->getFlags()); 1609 $this->outputTag($endTag); 1610 $this->popContext(); 1611 } 1612 1613 // Output our tag, moving the cursor past it, then update the context 1614 $this->outputTag($tag); 1615 $this->popContext(); 1616 1617 // If our fixing budget allows it, peek at upcoming tags and remove end tags that would 1618 // close tags that are already being closed now. Also, filter our list of tags being 1619 // reopened by removing those that would immediately be closed 1620 if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost) 1621 { 1622 /** 1623 * @var integer Rightmost position of the portion of text to ignore 1624 */ 1625 $ignorePos = $this->pos; 1626 1627 $i = count($this->tagStack); 1628 while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost) 1629 { 1630 $upcomingTag = $this->tagStack[$i]; 1631 1632 // Test whether the upcoming tag is positioned at current "ignore" position and it's 1633 // strictly an end tag (not a start tag or a self-closing tag) 1634 if ($upcomingTag->getPos() > $ignorePos 1635 || $upcomingTag->isStartTag()) 1636 { 1637 break; 1638 } 1639 1640 // Test whether this tag would close any of the tags we're about to reopen 1641 $j = count($closeTags); 1642 1643 while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost) 1644 { 1645 if ($upcomingTag->canClose($closeTags[$j])) 1646 { 1647 // Remove the tag from the lists and reset the keys 1648 array_splice($closeTags, $j, 1); 1649 1650 if (isset($reopenTags[$j])) 1651 { 1652 array_splice($reopenTags, $j, 1); 1653 } 1654 1655 // Extend the ignored text to cover this tag 1656 $ignorePos = max( 1657 $ignorePos, 1658 $upcomingTag->getPos() + $upcomingTag->getLen() 1659 ); 1660 1661 break; 1662 } 1663 } 1664 } 1665 1666 if ($ignorePos > $this->pos) 1667 { 1668 /** 1669 * @todo have a method that takes (pos,len) rather than a Tag 1670 */ 1671 $this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos)); 1672 } 1673 } 1674 1675 // Re-add tags that need to be reopened, at current cursor position 1676 foreach ($reopenTags as $startTag) 1677 { 1678 $newTag = $this->addCopyTag($startTag, $this->pos, 0); 1679 1680 // Re-pair the new tag 1681 $endTag = $startTag->getEndTag(); 1682 if ($endTag) 1683 { 1684 $newTag->pairWith($endTag); 1685 } 1686 } 1687 } 1688 1689 /** 1690 * Update counters and replace current context with its parent context 1691 * 1692 * @return void 1693 */ 1694 protected function popContext() 1695 { 1696 $tag = array_pop($this->openTags); 1697 --$this->cntOpen[$tag->getName()]; 1698 $this->context = $this->context['parentContext']; 1699 } 1700 1701 /** 1702 * Update counters and replace current context with a new context based on given tag 1703 * 1704 * If given tag is a self-closing tag, the context won't change 1705 * 1706 * @param Tag $tag Start tag (including self-closing) 1707 * @return void 1708 */ 1709 protected function pushContext(Tag $tag) 1710 { 1711 $tagName = $tag->getName(); 1712 $tagFlags = $tag->getFlags(); 1713 $tagConfig = $this->tagsConfig[$tagName]; 1714 1715 ++$this->cntTotal[$tagName]; 1716 1717 // If this is a self-closing tag, the context remains the same 1718 if ($tag->isSelfClosingTag()) 1719 { 1720 return; 1721 } 1722 1723 // Recompute the allowed tags 1724 $allowed = []; 1725 foreach ($this->context['allowed'] as $k => $v) 1726 { 1727 // If the current tag is not transparent, override the low bits (allowed children) of 1728 // current context with its high bits (allowed descendants) 1729 if (!($tagFlags & self::RULE_IS_TRANSPARENT)) 1730 { 1731 $v = ($v & 0xFF00) | ($v >> 8); 1732 } 1733 $allowed[] = $tagConfig['allowed'][$k] & $v; 1734 } 1735 1736 // Use this tag's flags as a base for this context and add inherited rules 1737 $flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE); 1738 1739 // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR 1740 if ($flags & self::RULE_DISABLE_AUTO_BR) 1741 { 1742 $flags &= ~self::RULE_ENABLE_AUTO_BR; 1743 } 1744 1745 ++$this->cntOpen[$tagName]; 1746 $this->openTags[] = $tag; 1747 $this->context = [ 1748 'allowed' => $allowed, 1749 'flags' => $flags, 1750 'inParagraph' => false, 1751 'parentContext' => $this->context 1752 ]; 1753 } 1754 1755 /** 1756 * Return whether given tag is allowed in current context 1757 * 1758 * @param string $tagName 1759 * @return bool 1760 */ 1761 protected function tagIsAllowed($tagName) 1762 { 1763 $n = $this->tagsConfig[$tagName]['bitNumber']; 1764 1765 return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7))); 1766 } 1767 1768 //========================================================================== 1769 // Tag stack 1770 //========================================================================== 1771 1772 /** 1773 * Add a start tag 1774 * 1775 * @param string $name Name of the tag 1776 * @param integer $pos Position of the tag in the text 1777 * @param integer $len Length of text consumed by the tag 1778 * @param integer $prio Tag's priority 1779 * @return Tag 1780 */ 1781 public function addStartTag($name, $pos, $len, $prio = 0) 1782 { 1783 return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio); 1784 } 1785 1786 /** 1787 * Add an end tag 1788 * 1789 * @param string $name Name of the tag 1790 * @param integer $pos Position of the tag in the text 1791 * @param integer $len Length of text consumed by the tag 1792 * @param integer $prio Tag's priority 1793 * @return Tag 1794 */ 1795 public function addEndTag($name, $pos, $len, $prio = 0) 1796 { 1797 return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio); 1798 } 1799 1800 /** 1801 * Add a self-closing tag 1802 * 1803 * @param string $name Name of the tag 1804 * @param integer $pos Position of the tag in the text 1805 * @param integer $len Length of text consumed by the tag 1806 * @param integer $prio Tag's priority 1807 * @return Tag 1808 */ 1809 public function addSelfClosingTag($name, $pos, $len, $prio = 0) 1810 { 1811 return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio); 1812 } 1813 1814 /** 1815 * Add a 0-width "br" tag to force a line break at given position 1816 * 1817 * @param integer $pos Position of the tag in the text 1818 * @param integer $prio Tag's priority 1819 * @return Tag 1820 */ 1821 public function addBrTag($pos, $prio = 0) 1822 { 1823 return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio); 1824 } 1825 1826 /** 1827 * Add an "ignore" tag 1828 * 1829 * @param integer $pos Position of the tag in the text 1830 * @param integer $len Length of text consumed by the tag 1831 * @param integer $prio Tag's priority 1832 * @return Tag 1833 */ 1834 public function addIgnoreTag($pos, $len, $prio = 0) 1835 { 1836 return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio); 1837 } 1838 1839 /** 1840 * Add a paragraph break at given position 1841 * 1842 * Uses a zero-width tag that is actually never output in the result 1843 * 1844 * @param integer $pos Position of the tag in the text 1845 * @param integer $prio Tag's priority 1846 * @return Tag 1847 */ 1848 public function addParagraphBreak($pos, $prio = 0) 1849 { 1850 return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio); 1851 } 1852 1853 /** 1854 * Add a copy of given tag at given position and length 1855 * 1856 * @param Tag $tag Original tag 1857 * @param integer $pos Copy's position 1858 * @param integer $len Copy's length 1859 * @param integer $prio Copy's priority (same as original by default) 1860 * @return Tag Copy tag 1861 */ 1862 public function addCopyTag(Tag $tag, $pos, $len, $prio = null) 1863 { 1864 if (!isset($prio)) 1865 { 1866 $prio = $tag->getSortPriority(); 1867 } 1868 $copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio); 1869 $copy->setAttributes($tag->getAttributes()); 1870 1871 return $copy; 1872 } 1873 1874 /** 1875 * Add a tag 1876 * 1877 * @param integer $type Tag's type 1878 * @param string $name Name of the tag 1879 * @param integer $pos Position of the tag in the text 1880 * @param integer $len Length of text consumed by the tag 1881 * @param integer $prio Tag's priority 1882 * @return Tag 1883 */ 1884 protected function addTag($type, $name, $pos, $len, $prio) 1885 { 1886 // Create the tag 1887 $tag = new Tag($type, $name, $pos, $len, $prio); 1888 1889 // Set this tag's rules bitfield 1890 if (isset($this->tagsConfig[$name])) 1891 { 1892 $tag->setFlags($this->tagsConfig[$name]['rules']['flags']); 1893 } 1894 1895 // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or 1896 // position is negative or if it's out of bounds 1897 if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag()) 1898 || $this->isInvalidTextSpan($pos, $len)) 1899 { 1900 $tag->invalidate(); 1901 } 1902 elseif (!empty($this->tagsConfig[$name]['isDisabled'])) 1903 { 1904 $this->logger->warn( 1905 'Tag is disabled', 1906 [ 1907 'tag' => $tag, 1908 'tagName' => $name 1909 ] 1910 ); 1911 $tag->invalidate(); 1912 } 1913 else 1914 { 1915 $this->insertTag($tag); 1916 } 1917 1918 return $tag; 1919 } 1920 1921 /** 1922 * Test whether given text span is outside text boundaries or an invalid UTF sequence 1923 * 1924 * @param integer $pos Start of text 1925 * @param integer $len Length of text 1926 * @return bool 1927 */ 1928 protected function isInvalidTextSpan($pos, $len) 1929 { 1930 return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1))); 1931 } 1932 1933 /** 1934 * Insert given tag in the tag stack 1935 * 1936 * @param Tag $tag 1937 * @return void 1938 */ 1939 protected function insertTag(Tag $tag) 1940 { 1941 if (!$this->tagStackIsSorted) 1942 { 1943 $this->tagStack[] = $tag; 1944 } 1945 else 1946 { 1947 // Scan the stack and copy every tag to the next slot until we find the correct index 1948 $i = count($this->tagStack); 1949 $key = $this->getSortKey($tag); 1950 while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1])) 1951 { 1952 $this->tagStack[$i] = $this->tagStack[$i - 1]; 1953 --$i; 1954 } 1955 $this->tagStack[$i] = $tag; 1956 } 1957 } 1958 1959 /** 1960 * Add a pair of tags 1961 * 1962 * @param string $name Name of the tags 1963 * @param integer $startPos Position of the start tag 1964 * @param integer $startLen Length of the start tag 1965 * @param integer $endPos Position of the start tag 1966 * @param integer $endLen Length of the start tag 1967 * @param integer $prio Start tag's priority (the end tag will be set to minus that value) 1968 * @return Tag Start tag 1969 */ 1970 public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0) 1971 { 1972 // NOTE: the end tag is added first to try to keep the stack in the correct order 1973 $endTag = $this->addEndTag($name, $endPos, $endLen, -$prio); 1974 $startTag = $this->addStartTag($name, $startPos, $startLen, $prio); 1975 $startTag->pairWith($endTag); 1976 1977 return $startTag; 1978 } 1979 1980 /** 1981 * Add a tag that represents a verbatim copy of the original text 1982 * 1983 * @param integer $pos Position of the tag in the text 1984 * @param integer $len Length of text consumed by the tag 1985 * @param integer $prio Tag's priority 1986 * @return Tag 1987 */ 1988 public function addVerbatim($pos, $len, $prio = 0) 1989 { 1990 return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio); 1991 } 1992 1993 /** 1994 * Sort tags by position and precedence 1995 * 1996 * @return void 1997 */ 1998 protected function sortTags() 1999 { 2000 $arr = []; 2001 foreach ($this->tagStack as $i => $tag) 2002 { 2003 $key = $this->getSortKey($tag, $i); 2004 $arr[$key] = $tag; 2005 } 2006 krsort($arr); 2007 2008 $this->tagStack = array_values($arr); 2009 $this->tagStackIsSorted = true; 2010 } 2011 2012 /** 2013 * Generate a key for given tag that can be used to compare its position using lexical comparisons 2014 * 2015 * Tags are sorted by position first, then by priority, then by whether they consume any text, 2016 * then by length, and finally in order of their creation. 2017 * 2018 * The stack's array is in reverse order. Therefore, tags that appear at the start of the text 2019 * are at the end of the array. 2020 * 2021 * @param Tag $tag 2022 * @param integer $tagIndex 2023 * @return string 2024 */ 2025 protected function getSortKey(Tag $tag, int $tagIndex = 0): string 2026 { 2027 // Ensure that negative values are sorted correctly by flagging them and making them positive 2028 $prioFlag = ($tag->getSortPriority() >= 0); 2029 $prio = $tag->getSortPriority(); 2030 if (!$prioFlag) 2031 { 2032 $prio += (1 << 30); 2033 } 2034 2035 // Sort 0-width tags separately from the rest 2036 $lenFlag = ($tag->getLen() > 0); 2037 if ($lenFlag) 2038 { 2039 // Inverse their length so that longest matches are processed first 2040 $lenOrder = $this->textLen - $tag->getLen(); 2041 } 2042 else 2043 { 2044 // Sort self-closing tags in-between start tags and end tags to keep them outside of tag 2045 // pairs 2046 $order = [ 2047 Tag::END_TAG => 0, 2048 Tag::SELF_CLOSING_TAG => 1, 2049 Tag::START_TAG => 2 2050 ]; 2051 $lenOrder = $order[$tag->getType()]; 2052 } 2053 2054 return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex); 2055 } 2056 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Mon Nov 25 19:05:08 2024 | Cross-referenced by PHPXref 0.7.1 |