[ Index ]

PHP Cross Reference of phpBB-3.3.12-deutsch

title

Body

[close]

/vendor/s9e/text-formatter/src/ -> Parser.php (source)

   1  <?php
   2  
   3  /**
   4  * @package   s9e\TextFormatter
   5  * @copyright Copyright (c) 2010-2022 The s9e authors
   6  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License
   7  */
   8  namespace s9e\TextFormatter;
   9  
  10  use InvalidArgumentException;
  11  use RuntimeException;
  12  use s9e\TextFormatter\Parser\FilterProcessing;
  13  use s9e\TextFormatter\Parser\Logger;
  14  use s9e\TextFormatter\Parser\Tag;
  15  
  16  class Parser
  17  {
  18      /**#@+
  19      * Boolean rules bitfield
  20      */
  21      const RULE_AUTO_CLOSE        = 1 << 0;
  22      const RULE_AUTO_REOPEN       = 1 << 1;
  23      const RULE_BREAK_PARAGRAPH   = 1 << 2;
  24      const RULE_CREATE_PARAGRAPHS = 1 << 3;
  25      const RULE_DISABLE_AUTO_BR   = 1 << 4;
  26      const RULE_ENABLE_AUTO_BR    = 1 << 5;
  27      const RULE_IGNORE_TAGS       = 1 << 6;
  28      const RULE_IGNORE_TEXT       = 1 << 7;
  29      const RULE_IGNORE_WHITESPACE = 1 << 8;
  30      const RULE_IS_TRANSPARENT    = 1 << 9;
  31      const RULE_PREVENT_BR        = 1 << 10;
  32      const RULE_SUSPEND_AUTO_BR   = 1 << 11;
  33      const RULE_TRIM_FIRST_LINE   = 1 << 12;
  34      /**#@-*/
  35  
  36      /**
  37      * Bitwise disjunction of rules related to automatic line breaks
  38      */
  39      const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
  40  
  41      /**
  42      * Bitwise disjunction of rules that are inherited by subcontexts
  43      */
  44      const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
  45  
  46      /**
  47      * All the characters that are considered whitespace
  48      */
  49      const WHITESPACE = " \n\t";
  50  
  51      /**
  52      * @var array Number of open tags for each tag name
  53      */
  54      protected $cntOpen;
  55  
  56      /**
  57      * @var array Number of times each tag has been used
  58      */
  59      protected $cntTotal;
  60  
  61      /**
  62      * @var array Current context
  63      */
  64      protected $context;
  65  
  66      /**
  67      * @var integer How hard the parser has worked on fixing bad markup so far
  68      */
  69      protected $currentFixingCost;
  70  
  71      /**
  72      * @var Tag Current tag being processed
  73      */
  74      protected $currentTag;
  75  
  76      /**
  77      * @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
  78      */
  79      protected $isRich;
  80  
  81      /**
  82      * @var Logger This parser's logger
  83      */
  84      protected $logger;
  85  
  86      /**
  87      * @var integer How hard the parser should work on fixing bad markup
  88      */
  89      public $maxFixingCost = 10000;
  90  
  91      /**
  92      * @var array Associative array of namespace prefixes in use in document (prefixes used as key)
  93      */
  94      protected $namespaces;
  95  
  96      /**
  97      * @var array Stack of open tags (instances of Tag)
  98      */
  99      protected $openTags;
 100  
 101      /**
 102      * @var string This parser's output
 103      */
 104      protected $output;
 105  
 106      /**
 107      * @var integer Position of the cursor in the original text
 108      */
 109      protected $pos;
 110  
 111      /**
 112      * @var array Array of callbacks, using plugin names as keys
 113      */
 114      protected $pluginParsers = [];
 115  
 116      /**
 117      * @var array Associative array of [pluginName => pluginConfig]
 118      */
 119      protected $pluginsConfig;
 120  
 121      /**
 122      * @var array Variables registered for use in filters
 123      */
 124      public $registeredVars = [];
 125  
 126      /**
 127      * @var array Root context, used at the root of the document
 128      */
 129      protected $rootContext;
 130  
 131      /**
 132      * @var array Tags' config
 133      */
 134      protected $tagsConfig;
 135  
 136      /**
 137      * @var array Tag storage
 138      */
 139      protected $tagStack;
 140  
 141      /**
 142      * @var bool Whether the tags in the stack are sorted
 143      */
 144      protected $tagStackIsSorted;
 145  
 146      /**
 147      * @var string Text being parsed
 148      */
 149      protected $text;
 150  
 151      /**
 152      * @var integer Length of the text being parsed
 153      */
 154      protected $textLen;
 155  
 156      /**
 157      * @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
 158      *              whether the parser was reset during execution
 159      */
 160      protected $uid = 0;
 161  
 162      /**
 163      * @var integer Position before which we output text verbatim, without paragraphs or linebreaks
 164      */
 165      protected $wsPos;
 166  
 167      /**
 168      * Constructor
 169      */
 170  	public function __construct(array $config)
 171      {
 172          $this->pluginsConfig  = $config['plugins'];
 173          $this->registeredVars = $config['registeredVars'];
 174          $this->rootContext    = $config['rootContext'];
 175          $this->tagsConfig     = $config['tags'];
 176  
 177          $this->__wakeup();
 178      }
 179  
 180      /**
 181      * Serializer
 182      *
 183      * Returns the properties that need to persist through serialization.
 184      *
 185      * NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
 186      * of the serializer to the user (e.g. igbinary)
 187      *
 188      * @return array
 189      */
 190  	public function __sleep()
 191      {
 192          return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
 193      }
 194  
 195      /**
 196      * Unserializer
 197      *
 198      * @return void
 199      */
 200  	public function __wakeup()
 201      {
 202          $this->logger = new Logger;
 203      }
 204  
 205      /**
 206      * Reset the parser for a new parsing
 207      *
 208      * @param  string $text Text to be parsed
 209      * @return void
 210      */
 211  	protected function reset($text)
 212      {
 213          // Reject invalid UTF-8
 214          if (!preg_match('//u', $text))
 215          {
 216              throw new InvalidArgumentException('Invalid UTF-8 input');
 217          }
 218  
 219          // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
 220          $text = preg_replace('/\\r\\n?/', "\n", $text);
 221          $text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
 222  
 223          // Clear the logs
 224          $this->logger->clear();
 225  
 226          // Initialize the rest
 227          $this->cntOpen           = [];
 228          $this->cntTotal          = [];
 229          $this->currentFixingCost = 0;
 230          $this->currentTag        = null;
 231          $this->isRich            = false;
 232          $this->namespaces        = [];
 233          $this->openTags          = [];
 234          $this->output            = '';
 235          $this->pos               = 0;
 236          $this->tagStack          = [];
 237          $this->tagStackIsSorted  = false;
 238          $this->text              = $text;
 239          $this->textLen           = strlen($text);
 240          $this->wsPos             = 0;
 241  
 242          // Initialize the root context
 243          $this->context = $this->rootContext;
 244          $this->context['inParagraph'] = false;
 245  
 246          // Bump the UID
 247          ++$this->uid;
 248      }
 249  
 250      /**
 251      * Set a tag's option
 252      *
 253      * This method ensures that the tag's config is a value and not a reference, to prevent
 254      * potential side-effects. References contained *inside* the tag's config are left untouched
 255      *
 256      * @param  string $tagName     Tag's name
 257      * @param  string $optionName  Option's name
 258      * @param  mixed  $optionValue Option's value
 259      * @return void
 260      */
 261  	protected function setTagOption($tagName, $optionName, $optionValue)
 262      {
 263          if (isset($this->tagsConfig[$tagName]))
 264          {
 265              // Copy the tag's config and remove it. That will destroy the reference
 266              $tagConfig = $this->tagsConfig[$tagName];
 267              unset($this->tagsConfig[$tagName]);
 268  
 269              // Set the new value and replace the tag's config
 270              $tagConfig[$optionName]     = $optionValue;
 271              $this->tagsConfig[$tagName] = $tagConfig;
 272          }
 273      }
 274  
 275      //==========================================================================
 276      // Public API
 277      //==========================================================================
 278  
 279      /**
 280      * Disable a tag
 281      *
 282      * @param  string $tagName Name of the tag
 283      * @return void
 284      */
 285  	public function disableTag($tagName)
 286      {
 287          $this->setTagOption($tagName, 'isDisabled', true);
 288      }
 289  
 290      /**
 291      * Enable a tag
 292      *
 293      * @param  string $tagName Name of the tag
 294      * @return void
 295      */
 296  	public function enableTag($tagName)
 297      {
 298          if (isset($this->tagsConfig[$tagName]))
 299          {
 300              unset($this->tagsConfig[$tagName]['isDisabled']);
 301          }
 302      }
 303  
 304      /**
 305      * Get this parser's Logger instance
 306      *
 307      * @return Logger
 308      */
 309  	public function getLogger()
 310      {
 311          return $this->logger;
 312      }
 313  
 314      /**
 315      * Return the last text parsed
 316      *
 317      * This method returns the normalized text, which may be slightly different from the original
 318      * text in that EOLs are normalized to LF and other control codes are stripped. This method is
 319      * meant to be used in support of processing log entries, which contain offsets based on the
 320      * normalized text
 321      *
 322      * @see Parser::reset()
 323      *
 324      * @return string
 325      */
 326  	public function getText()
 327      {
 328          return $this->text;
 329      }
 330  
 331      /**
 332      * Parse a text
 333      *
 334      * @param  string $text Text to parse
 335      * @return string       XML representation
 336      */
 337  	public function parse($text)
 338      {
 339          // Reset the parser and save the uid
 340          $this->reset($text);
 341          $uid = $this->uid;
 342  
 343          // Do the heavy lifting
 344          $this->executePluginParsers();
 345          $this->processTags();
 346  
 347          // Finalize the document
 348          $this->finalizeOutput();
 349  
 350          // Check the uid in case a plugin or a filter reset the parser mid-execution
 351          if ($this->uid !== $uid)
 352          {
 353              throw new RuntimeException('The parser has been reset during execution');
 354          }
 355  
 356          // Log a warning if the fixing cost limit was exceeded
 357          if ($this->currentFixingCost > $this->maxFixingCost)
 358          {
 359              $this->logger->warn('Fixing cost limit exceeded');
 360          }
 361  
 362          return $this->output;
 363      }
 364  
 365      /**
 366      * Change a tag's tagLimit
 367      *
 368      * NOTE: the default tagLimit should generally be set during configuration instead
 369      *
 370      * @param  string  $tagName  The tag's name, in UPPERCASE
 371      * @param  integer $tagLimit
 372      * @return void
 373      */
 374  	public function setTagLimit($tagName, $tagLimit)
 375      {
 376          $this->setTagOption($tagName, 'tagLimit', $tagLimit);
 377      }
 378  
 379      /**
 380      * Change a tag's nestingLimit
 381      *
 382      * NOTE: the default nestingLimit should generally be set during configuration instead
 383      *
 384      * @param  string  $tagName      The tag's name, in UPPERCASE
 385      * @param  integer $nestingLimit
 386      * @return void
 387      */
 388  	public function setNestingLimit($tagName, $nestingLimit)
 389      {
 390          $this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
 391      }
 392  
 393      //==========================================================================
 394      // Output handling
 395      //==========================================================================
 396  
 397      /**
 398      * Finalize the output by appending the rest of the unprocessed text and create the root node
 399      *
 400      * @return void
 401      */
 402  	protected function finalizeOutput()
 403      {
 404          // Output the rest of the text and close the last paragraph
 405          $this->outputText($this->textLen, 0, true);
 406  
 407          // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
 408          do
 409          {
 410              $this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
 411          }
 412          while ($cnt > 0);
 413  
 414          // Merge consecutive <i> tags
 415          if (strpos($this->output, '</i><i>') !== false)
 416          {
 417              $this->output = str_replace('</i><i>', '', $this->output);
 418          }
 419  
 420          // Remove control characters from the output to ensure it's valid XML
 421          $this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F])', '', $this->output);
 422  
 423          // Encode Unicode characters that are outside of the BMP
 424          $this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
 425  
 426          // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
 427          $tagName = ($this->isRich) ? 'r' : 't';
 428  
 429          // Prepare the root node with all the namespace declarations
 430          $tmp = '<' . $tagName;
 431          foreach (array_keys($this->namespaces) as $prefix)
 432          {
 433              $tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
 434          }
 435  
 436          $this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
 437      }
 438  
 439      /**
 440      * Append a tag to the output
 441      *
 442      * @param  Tag  $tag Tag to append
 443      * @return void
 444      */
 445  	protected function outputTag(Tag $tag)
 446      {
 447          $this->isRich = true;
 448  
 449          $tagName  = $tag->getName();
 450          $tagPos   = $tag->getPos();
 451          $tagLen   = $tag->getLen();
 452          $tagFlags = $tag->getFlags();
 453  
 454          if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
 455          {
 456              $skipBefore = 1;
 457              $skipAfter  = ($tag->isEndTag()) ? 2 : 1;
 458          }
 459          else
 460          {
 461              $skipBefore = $skipAfter = 0;
 462          }
 463  
 464          // Current paragraph must end before the tag if:
 465          //  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
 466          //  - the tag is an end tag (but not self-closing)
 467          $closeParagraph = (!$tag->isStartTag() || ($tagFlags & self::RULE_BREAK_PARAGRAPH));
 468  
 469          // Let the cursor catch up with this tag's position
 470          $this->outputText($tagPos, $skipBefore, $closeParagraph);
 471  
 472          // Capture the text consumed by the tag
 473          $tagText = ($tagLen)
 474                   ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
 475                   : '';
 476  
 477          // Output current tag
 478          if ($tag->isStartTag())
 479          {
 480              // Handle paragraphs before opening the tag
 481              if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
 482              {
 483                  $this->outputParagraphStart($tagPos);
 484              }
 485  
 486              // Record this tag's namespace, if applicable
 487              $colonPos = strpos($tagName, ':');
 488              if ($colonPos)
 489              {
 490                  $this->namespaces[substr($tagName, 0, $colonPos)] = 0;
 491              }
 492  
 493              // Open the start tag and add its attributes, but don't close the tag
 494              $this->output .= '<' . $tagName;
 495  
 496              // We output the attributes in lexical order. Helps canonicalizing the output and could
 497              // prove useful someday
 498              $attributes = $tag->getAttributes();
 499              ksort($attributes);
 500  
 501              foreach ($attributes as $attrName => $attrValue)
 502              {
 503                  $this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
 504              }
 505  
 506              if ($tag->isSelfClosingTag())
 507              {
 508                  if ($tagLen)
 509                  {
 510                      $this->output .= '>' . $tagText . '</' . $tagName . '>';
 511                  }
 512                  else
 513                  {
 514                      $this->output .= '/>';
 515                  }
 516              }
 517              elseif ($tagLen)
 518              {
 519                  $this->output .= '><s>' . $tagText . '</s>';
 520              }
 521              else
 522              {
 523                  $this->output .= '>';
 524              }
 525          }
 526          else
 527          {
 528              if ($tagLen)
 529              {
 530                  $this->output .= '<e>' . $tagText . '</e>';
 531              }
 532  
 533              $this->output .= '</' . $tagName . '>';
 534          }
 535  
 536          // Move the cursor past the tag
 537          $this->pos = $tagPos + $tagLen;
 538  
 539          // Skip newlines (no other whitespace) after this tag
 540          $this->wsPos = $this->pos;
 541          while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
 542          {
 543              // Decrement the number of lines to skip
 544              --$skipAfter;
 545  
 546              // Move the cursor past the newline
 547              ++$this->wsPos;
 548          }
 549      }
 550  
 551      /**
 552      * Output the text between the cursor's position (included) and given position (not included)
 553      *
 554      * @param  integer $catchupPos     Position we're catching up to
 555      * @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
 556      * @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
 557      * @return void
 558      */
 559  	protected function outputText($catchupPos, $maxLines, $closeParagraph)
 560      {
 561          if ($closeParagraph)
 562          {
 563              if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
 564              {
 565                  $closeParagraph = false;
 566              }
 567              else
 568              {
 569                  // Ignore any number of lines at the end if we're closing a paragraph
 570                  $maxLines = -1;
 571              }
 572          }
 573  
 574          if ($this->pos >= $catchupPos)
 575          {
 576              // We're already there, close the paragraph if applicable and return
 577              if ($closeParagraph)
 578              {
 579                  $this->outputParagraphEnd();
 580              }
 581  
 582              return;
 583          }
 584  
 585          // Skip over previously identified whitespace if applicable
 586          if ($this->wsPos > $this->pos)
 587          {
 588              $skipPos       = min($catchupPos, $this->wsPos);
 589              $this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
 590              $this->pos     = $skipPos;
 591  
 592              if ($this->pos >= $catchupPos)
 593              {
 594                  // Skipped everything. Close the paragraph if applicable and return
 595                  if ($closeParagraph)
 596                  {
 597                      $this->outputParagraphEnd();
 598                  }
 599  
 600                  return;
 601              }
 602          }
 603  
 604          // Test whether we're even supposed to output anything
 605          if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
 606          {
 607              $catchupLen  = $catchupPos - $this->pos;
 608              $catchupText = substr($this->text, $this->pos, $catchupLen);
 609  
 610              // If the catchup text is not entirely composed of whitespace, we put it inside ignore
 611              // tags
 612              if (strspn($catchupText, " \n\t") < $catchupLen)
 613              {
 614                  $catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
 615              }
 616  
 617              $this->output .= $catchupText;
 618              $this->pos = $catchupPos;
 619  
 620              if ($closeParagraph)
 621              {
 622                  $this->outputParagraphEnd();
 623              }
 624  
 625              return;
 626          }
 627  
 628          // Compute the amount of text to ignore at the end of the output
 629          $ignorePos = $catchupPos;
 630          $ignoreLen = 0;
 631  
 632          // Ignore as many lines (including whitespace) as specified
 633          while ($maxLines && --$ignorePos >= $this->pos)
 634          {
 635              $c = $this->text[$ignorePos];
 636              if (strpos(self::WHITESPACE, $c) === false)
 637              {
 638                  break;
 639              }
 640  
 641              if ($c === "\n")
 642              {
 643                  --$maxLines;
 644              }
 645  
 646              ++$ignoreLen;
 647          }
 648  
 649          // Adjust $catchupPos to ignore the text at the end
 650          $catchupPos -= $ignoreLen;
 651  
 652          // Break down the text in paragraphs if applicable
 653          if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
 654          {
 655              if (!$this->context['inParagraph'])
 656              {
 657                  $this->outputWhitespace($catchupPos);
 658  
 659                  if ($catchupPos > $this->pos)
 660                  {
 661                      $this->outputParagraphStart($catchupPos);
 662                  }
 663              }
 664  
 665              // Look for a paragraph break in this text
 666              $pbPos = strpos($this->text, "\n\n", $this->pos);
 667  
 668              while ($pbPos !== false && $pbPos < $catchupPos)
 669              {
 670                  $this->outputText($pbPos, 0, true);
 671                  $this->outputParagraphStart($catchupPos);
 672  
 673                  $pbPos = strpos($this->text, "\n\n", $this->pos);
 674              }
 675          }
 676  
 677          // Capture, escape and output the text
 678          if ($catchupPos > $this->pos)
 679          {
 680              $catchupText = htmlspecialchars(
 681                  substr($this->text, $this->pos, $catchupPos - $this->pos),
 682                  ENT_NOQUOTES,
 683                  'UTF-8'
 684              );
 685  
 686              // Format line breaks if applicable
 687              if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
 688              {
 689                  $catchupText = str_replace("\n", "<br/>\n", $catchupText);
 690              }
 691  
 692              $this->output .= $catchupText;
 693          }
 694  
 695          // Close the paragraph if applicable
 696          if ($closeParagraph)
 697          {
 698              $this->outputParagraphEnd();
 699          }
 700  
 701          // Add the ignored text if applicable
 702          if ($ignoreLen)
 703          {
 704              $this->output .= substr($this->text, $catchupPos, $ignoreLen);
 705          }
 706  
 707          // Move the cursor past the text
 708          $this->pos = $catchupPos + $ignoreLen;
 709      }
 710  
 711      /**
 712      * Output a linebreak tag
 713      *
 714      * @param  Tag  $tag
 715      * @return void
 716      */
 717  	protected function outputBrTag(Tag $tag)
 718      {
 719          $this->outputText($tag->getPos(), 0, false);
 720          $this->output .= '<br/>';
 721      }
 722  
 723      /**
 724      * Output an ignore tag
 725      *
 726      * @param  Tag  $tag
 727      * @return void
 728      */
 729  	protected function outputIgnoreTag(Tag $tag)
 730      {
 731          $tagPos = $tag->getPos();
 732          $tagLen = $tag->getLen();
 733  
 734          // Capture the text to ignore
 735          $ignoreText = substr($this->text, $tagPos, $tagLen);
 736  
 737          // Catch up with the tag's position then output the tag
 738          $this->outputText($tagPos, 0, false);
 739          $this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
 740          $this->isRich = true;
 741  
 742          // Move the cursor past this tag
 743          $this->pos = $tagPos + $tagLen;
 744      }
 745  
 746      /**
 747      * Start a paragraph between current position and given position, if applicable
 748      *
 749      * @param  integer $maxPos Rightmost position at which the paragraph can be opened
 750      * @return void
 751      */
 752  	protected function outputParagraphStart($maxPos)
 753      {
 754          // Do nothing if we're already in a paragraph, or if we don't use paragraphs
 755          if ($this->context['inParagraph']
 756           || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
 757          {
 758              return;
 759          }
 760  
 761          // Output the whitespace between $this->pos and $maxPos if applicable
 762          $this->outputWhitespace($maxPos);
 763  
 764          // Open the paragraph, but only if it's not at the very end of the text
 765          if ($this->pos < $this->textLen)
 766          {
 767              $this->output .= '<p>';
 768              $this->context['inParagraph'] = true;
 769          }
 770      }
 771  
 772      /**
 773      * Close current paragraph at current position if applicable
 774      *
 775      * @return void
 776      */
 777  	protected function outputParagraphEnd()
 778      {
 779          // Do nothing if we're not in a paragraph
 780          if (!$this->context['inParagraph'])
 781          {
 782              return;
 783          }
 784  
 785          $this->output .= '</p>';
 786          $this->context['inParagraph'] = false;
 787      }
 788  
 789      /**
 790      * Output the content of a verbatim tag
 791      *
 792      * @param  Tag  $tag
 793      * @return void
 794      */
 795  	protected function outputVerbatim(Tag $tag)
 796      {
 797          $flags = $this->context['flags'];
 798          $this->context['flags'] = $tag->getFlags();
 799          $this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
 800          $this->context['flags'] = $flags;
 801      }
 802  
 803      /**
 804      * Skip as much whitespace after current position as possible
 805      *
 806      * @param  integer $maxPos Rightmost character to be skipped
 807      * @return void
 808      */
 809  	protected function outputWhitespace($maxPos)
 810      {
 811          if ($maxPos > $this->pos)
 812          {
 813              $spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
 814  
 815              if ($spn)
 816              {
 817                  $this->output .= substr($this->text, $this->pos, $spn);
 818                  $this->pos += $spn;
 819              }
 820          }
 821      }
 822  
 823      //==========================================================================
 824      // Plugins handling
 825      //==========================================================================
 826  
 827      /**
 828      * Disable a plugin
 829      *
 830      * @param  string $pluginName Name of the plugin
 831      * @return void
 832      */
 833  	public function disablePlugin($pluginName)
 834      {
 835          if (isset($this->pluginsConfig[$pluginName]))
 836          {
 837              // Copy the plugin's config to remove the reference
 838              $pluginConfig = $this->pluginsConfig[$pluginName];
 839              unset($this->pluginsConfig[$pluginName]);
 840  
 841              // Update the value and replace the plugin's config
 842              $pluginConfig['isDisabled'] = true;
 843              $this->pluginsConfig[$pluginName] = $pluginConfig;
 844          }
 845      }
 846  
 847      /**
 848      * Enable a plugin
 849      *
 850      * @param  string $pluginName Name of the plugin
 851      * @return void
 852      */
 853  	public function enablePlugin($pluginName)
 854      {
 855          if (isset($this->pluginsConfig[$pluginName]))
 856          {
 857              $this->pluginsConfig[$pluginName]['isDisabled'] = false;
 858          }
 859      }
 860  
 861      /**
 862      * Execute given plugin
 863      *
 864      * @param  string $pluginName Plugin's name
 865      * @return void
 866      */
 867  	protected function executePluginParser($pluginName)
 868      {
 869          $pluginConfig = $this->pluginsConfig[$pluginName];
 870          if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
 871          {
 872              return;
 873          }
 874  
 875          $matches = [];
 876          if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit']))
 877          {
 878              $matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
 879              if (empty($matches))
 880              {
 881                  return;
 882              }
 883          }
 884  
 885          // Execute the plugin's parser, which will add tags via $this->addStartTag() and others
 886          call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
 887      }
 888  
 889      /**
 890      * Execute all the plugins
 891      *
 892      * @return void
 893      */
 894  	protected function executePluginParsers()
 895      {
 896          foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
 897          {
 898              if (empty($pluginConfig['isDisabled']))
 899              {
 900                  $this->executePluginParser($pluginName);
 901              }
 902          }
 903      }
 904  
 905      /**
 906      * Execute given regexp and returns as many matches as given limit
 907      *
 908      * @param  string  $regexp
 909      * @param  integer $limit
 910      * @return array
 911      */
 912  	protected function getMatches($regexp, $limit)
 913      {
 914          $cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
 915          if ($cnt > $limit)
 916          {
 917              $matches = array_slice($matches, 0, $limit);
 918          }
 919  
 920          return $matches;
 921      }
 922  
 923      /**
 924      * Get the cached callback for given plugin's parser
 925      *
 926      * @param  string $pluginName Plugin's name
 927      * @return callable
 928      */
 929  	protected function getPluginParser($pluginName)
 930      {
 931          // Cache a new instance of this plugin's parser if there isn't one already
 932          if (!isset($this->pluginParsers[$pluginName]))
 933          {
 934              $pluginConfig = $this->pluginsConfig[$pluginName];
 935              $className = (isset($pluginConfig['className']))
 936                         ? $pluginConfig['className']
 937                         : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
 938  
 939              // Register the parser as a callback
 940              $this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
 941          }
 942  
 943          return $this->pluginParsers[$pluginName];
 944      }
 945  
 946      /**
 947      * Register a parser
 948      *
 949      * Can be used to add a new parser with no plugin config, or pre-generate a parser for an
 950      * existing plugin
 951      *
 952      * @param  string   $pluginName
 953      * @param  callable $parser
 954      * @param  string   $regexp
 955      * @param  integer  $limit
 956      * @return void
 957      */
 958  	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
 959      {
 960          if (!is_callable($parser))
 961          {
 962              throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
 963          }
 964          // Create an empty config for this plugin to ensure it is executed
 965          if (!isset($this->pluginsConfig[$pluginName]))
 966          {
 967              $this->pluginsConfig[$pluginName] = [];
 968          }
 969          if (isset($regexp))
 970          {
 971              $this->pluginsConfig[$pluginName]['regexp']      = $regexp;
 972              $this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
 973          }
 974          $this->pluginParsers[$pluginName] = $parser;
 975      }
 976  
 977      //==========================================================================
 978      // Rules handling
 979      //==========================================================================
 980  
 981      /**
 982      * Apply closeAncestor rules associated with given tag
 983      *
 984      * @param  Tag  $tag Tag
 985      * @return bool      Whether a new tag has been added
 986      */
 987  	protected function closeAncestor(Tag $tag)
 988      {
 989          if (!empty($this->openTags))
 990          {
 991              $tagName   = $tag->getName();
 992              $tagConfig = $this->tagsConfig[$tagName];
 993  
 994              if (!empty($tagConfig['rules']['closeAncestor']))
 995              {
 996                  $i = count($this->openTags);
 997  
 998                  while (--$i >= 0)
 999                  {
1000                      $ancestor     = $this->openTags[$i];
1001                      $ancestorName = $ancestor->getName();
1002  
1003                      if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1004                      {
1005                          ++$this->currentFixingCost;
1006  
1007                          // We have to close this ancestor. First we reinsert this tag...
1008                          $this->tagStack[] = $tag;
1009  
1010                          // ...then we add a new end tag for it with a better priority
1011                          $this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1012  
1013                          return true;
1014                      }
1015                  }
1016              }
1017          }
1018  
1019          return false;
1020      }
1021  
1022      /**
1023      * Apply closeParent rules associated with given tag
1024      *
1025      * @param  Tag  $tag Tag
1026      * @return bool      Whether a new tag has been added
1027      */
1028  	protected function closeParent(Tag $tag)
1029      {
1030          if (!empty($this->openTags))
1031          {
1032              $tagName   = $tag->getName();
1033              $tagConfig = $this->tagsConfig[$tagName];
1034  
1035              if (!empty($tagConfig['rules']['closeParent']))
1036              {
1037                  $parent     = end($this->openTags);
1038                  $parentName = $parent->getName();
1039  
1040                  if (isset($tagConfig['rules']['closeParent'][$parentName]))
1041                  {
1042                      ++$this->currentFixingCost;
1043  
1044                      // We have to close that parent. First we reinsert the tag...
1045                      $this->tagStack[] = $tag;
1046  
1047                      // ...then we add a new end tag for it with a better priority
1048                      $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1049  
1050                      return true;
1051                  }
1052              }
1053          }
1054  
1055          return false;
1056      }
1057  
1058      /**
1059      * Apply the createChild rules associated with given tag
1060      *
1061      * @param  Tag  $tag Tag
1062      * @return void
1063      */
1064  	protected function createChild(Tag $tag)
1065      {
1066          $tagConfig = $this->tagsConfig[$tag->getName()];
1067          if (isset($tagConfig['rules']['createChild']))
1068          {
1069              $priority = -1000;
1070              $tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1071              foreach ($tagConfig['rules']['createChild'] as $tagName)
1072              {
1073                  $this->addStartTag($tagName, $tagPos, 0, ++$priority);
1074              }
1075          }
1076      }
1077  
1078      /**
1079      * Apply fosterParent rules associated with given tag
1080      *
1081      * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1082      *       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1083      *       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1084      *       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1085      *       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1086      *       loop from running indefinitely
1087      *
1088      * @param  Tag  $tag Tag
1089      * @return bool      Whether a new tag has been added
1090      */
1091  	protected function fosterParent(Tag $tag)
1092      {
1093          if (!empty($this->openTags))
1094          {
1095              $tagName   = $tag->getName();
1096              $tagConfig = $this->tagsConfig[$tagName];
1097  
1098              if (!empty($tagConfig['rules']['fosterParent']))
1099              {
1100                  $parent     = end($this->openTags);
1101                  $parentName = $parent->getName();
1102  
1103                  if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1104                  {
1105                      if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1106                      {
1107                          $this->addFosterTag($tag, $parent);
1108                      }
1109  
1110                      // Reinsert current tag
1111                      $this->tagStack[] = $tag;
1112  
1113                      // And finally close its parent with a priority that ensures it is processed
1114                      // before this tag
1115                      $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1116  
1117                      // Adjust the fixing cost to account for the additional tags/processing
1118                      $this->currentFixingCost += 4;
1119  
1120                      return true;
1121                  }
1122              }
1123          }
1124  
1125          return false;
1126      }
1127  
1128      /**
1129      * Apply requireAncestor rules associated with given tag
1130      *
1131      * @param  Tag  $tag Tag
1132      * @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1133      */
1134  	protected function requireAncestor(Tag $tag)
1135      {
1136          $tagName   = $tag->getName();
1137          $tagConfig = $this->tagsConfig[$tagName];
1138  
1139          if (isset($tagConfig['rules']['requireAncestor']))
1140          {
1141              foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1142              {
1143                  if (!empty($this->cntOpen[$ancestorName]))
1144                  {
1145                      return false;
1146                  }
1147              }
1148  
1149              $this->logger->err('Tag requires an ancestor', [
1150                  'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1151                  'tag'             => $tag
1152              ]);
1153  
1154              return true;
1155          }
1156  
1157          return false;
1158      }
1159  
1160      //==========================================================================
1161      // Tag processing
1162      //==========================================================================
1163  
1164      /**
1165      * Create and add a copy of a tag as a child of a given tag
1166      *
1167      * @param  Tag  $tag       Current tag
1168      * @param  Tag  $fosterTag Tag to foster
1169      * @return void
1170      */
1171  	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1172      {
1173          list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1174  
1175          // Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1176          $childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1177          $tag->cascadeInvalidationTo($childTag);
1178      }
1179  
1180      /**
1181      * Create and add an end tag for given start tag at given position
1182      *
1183      * @param  Tag     $startTag Start tag
1184      * @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1185      * @param  integer $prio     End tag's priority
1186      * @return Tag
1187      */
1188  	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1189      {
1190          $tagName = $startTag->getName();
1191  
1192          // Adjust the end tag's position if whitespace is to be minimized
1193          if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1194          {
1195              $tagPos = $this->getMagicEndPos($tagPos);
1196          }
1197  
1198          // Add a 0-width end tag that is paired with the given start tag
1199          $endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1200          $endTag->pairWith($startTag);
1201  
1202          return $endTag;
1203      }
1204  
1205      /**
1206      * Compute the position of a magic end tag, adjusted for whitespace
1207      *
1208      * @param  integer $tagPos Rightmost possible position for the tag
1209      * @return integer
1210      */
1211  	protected function getMagicEndPos($tagPos)
1212      {
1213          // Back up from given position to the cursor's position until we find a character that
1214          // is not whitespace
1215          while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1216          {
1217              --$tagPos;
1218          }
1219  
1220          return $tagPos;
1221      }
1222  
1223      /**
1224      * Compute the position and priority of a magic start tag, adjusted for whitespace
1225      *
1226      * @param  integer   $tagPos Leftmost possible position for the tag
1227      * @return integer[]         [Tag pos, priority]
1228      */
1229  	protected function getMagicStartCoords($tagPos)
1230      {
1231          if (empty($this->tagStack))
1232          {
1233              // Set the next position outside the text boundaries
1234              $nextPos  = $this->textLen + 1;
1235              $nextPrio = 0;
1236          }
1237          else
1238          {
1239              $nextTag  = end($this->tagStack);
1240              $nextPos  = $nextTag->getPos();
1241              $nextPrio = $nextTag->getSortPriority();
1242          }
1243  
1244          // Find the first non-whitespace position before next tag or the end of text
1245          while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1246          {
1247              ++$tagPos;
1248          }
1249  
1250          // Set a priority that ensures this tag appears before the next tag
1251          $prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1252  
1253          return [$tagPos, $prio];
1254      }
1255  
1256      /**
1257      * Test whether given start tag is immediately followed by a closing tag
1258      *
1259      * @param  Tag  $tag Start tag
1260      * @return bool
1261      */
1262  	protected function isFollowedByClosingTag(Tag $tag)
1263      {
1264          return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1265      }
1266  
1267      /**
1268      * Process all tags in the stack
1269      *
1270      * @return void
1271      */
1272  	protected function processTags()
1273      {
1274          if (empty($this->tagStack))
1275          {
1276              return;
1277          }
1278  
1279          // Initialize the count tables
1280          foreach (array_keys($this->tagsConfig) as $tagName)
1281          {
1282              $this->cntOpen[$tagName]  = 0;
1283              $this->cntTotal[$tagName] = 0;
1284          }
1285  
1286          // Process the tag stack, close tags that were left open and repeat until done
1287          do
1288          {
1289              while (!empty($this->tagStack))
1290              {
1291                  if (!$this->tagStackIsSorted)
1292                  {
1293                      $this->sortTags();
1294                  }
1295  
1296                  $this->currentTag = array_pop($this->tagStack);
1297                  $this->processCurrentTag();
1298              }
1299  
1300              // Close tags that were left open
1301              foreach ($this->openTags as $startTag)
1302              {
1303                  // NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1304                  //       the stack is processed in LIFO order, it means that tags get closed in
1305                  //       the correct order, from descendants to ancestors
1306                  $this->addMagicEndTag($startTag, $this->textLen);
1307              }
1308          }
1309          while (!empty($this->tagStack));
1310      }
1311  
1312      /**
1313      * Process current tag
1314      *
1315      * @return void
1316      */
1317  	protected function processCurrentTag()
1318      {
1319          // Invalidate current tag if tags are disabled and current tag would not close the last open
1320          // tag and is not a system tag
1321          if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1322           && !$this->currentTag->canClose(end($this->openTags))
1323           && !$this->currentTag->isSystemTag())
1324          {
1325              $this->currentTag->invalidate();
1326          }
1327  
1328          $tagPos = $this->currentTag->getPos();
1329          $tagLen = $this->currentTag->getLen();
1330  
1331          // Test whether the cursor passed this tag's position already
1332          if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1333          {
1334              // Test whether this tag is paired with a start tag and this tag is still open
1335              $startTag = $this->currentTag->getStartTag();
1336  
1337              if ($startTag && in_array($startTag, $this->openTags, true))
1338              {
1339                  // Create an end tag that matches current tag's start tag, which consumes as much of
1340                  // the same text as current tag and is paired with the same start tag
1341                  $this->addEndTag(
1342                      $startTag->getName(),
1343                      $this->pos,
1344                      max(0, $tagPos + $tagLen - $this->pos)
1345                  )->pairWith($startTag);
1346  
1347                  // Note that current tag is not invalidated, it's merely replaced
1348                  return;
1349              }
1350  
1351              // If this is an ignore tag, try to ignore as much as the remaining text as possible
1352              if ($this->currentTag->isIgnoreTag())
1353              {
1354                  $ignoreLen = $tagPos + $tagLen - $this->pos;
1355  
1356                  if ($ignoreLen > 0)
1357                  {
1358                      // Create a new ignore tag and move on
1359                      $this->addIgnoreTag($this->pos, $ignoreLen);
1360  
1361                      return;
1362                  }
1363              }
1364  
1365              // Skipped tags are invalidated
1366              $this->currentTag->invalidate();
1367          }
1368  
1369          if ($this->currentTag->isInvalid())
1370          {
1371              return;
1372          }
1373  
1374          if ($this->currentTag->isIgnoreTag())
1375          {
1376              $this->outputIgnoreTag($this->currentTag);
1377          }
1378          elseif ($this->currentTag->isBrTag())
1379          {
1380              // Output the tag if it's allowed, ignore it otherwise
1381              if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1382              {
1383                  $this->outputBrTag($this->currentTag);
1384              }
1385          }
1386          elseif ($this->currentTag->isParagraphBreak())
1387          {
1388              $this->outputText($this->currentTag->getPos(), 0, true);
1389          }
1390          elseif ($this->currentTag->isVerbatim())
1391          {
1392              $this->outputVerbatim($this->currentTag);
1393          }
1394          elseif ($this->currentTag->isStartTag())
1395          {
1396              $this->processStartTag($this->currentTag);
1397          }
1398          else
1399          {
1400              $this->processEndTag($this->currentTag);
1401          }
1402      }
1403  
1404      /**
1405      * Process given start tag (including self-closing tags) at current position
1406      *
1407      * @param  Tag  $tag Start tag (including self-closing)
1408      * @return void
1409      */
1410  	protected function processStartTag(Tag $tag)
1411      {
1412          $tagName   = $tag->getName();
1413          $tagConfig = $this->tagsConfig[$tagName];
1414  
1415          // 1. Check that this tag has not reached its global limit tagLimit
1416          // 2. Execute this tag's filterChain, which will filter/validate its attributes
1417          // 3. Apply closeParent, closeAncestor and fosterParent rules
1418          // 4. Check for nestingLimit
1419          // 5. Apply requireAncestor rules
1420          //
1421          // This order ensures that the tag is valid and within the set limits before we attempt to
1422          // close parents or ancestors. We need to close ancestors before we can check for nesting
1423          // limits, whether this tag is allowed within current context (the context may change
1424          // as ancestors are closed) or whether the required ancestors are still there (they might
1425          // have been closed by a rule.)
1426          if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1427          {
1428              $this->logger->err(
1429                  'Tag limit exceeded',
1430                  [
1431                      'tag'      => $tag,
1432                      'tagName'  => $tagName,
1433                      'tagLimit' => $tagConfig['tagLimit']
1434                  ]
1435              );
1436              $tag->invalidate();
1437  
1438              return;
1439          }
1440  
1441          FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1442          if ($tag->isInvalid())
1443          {
1444              return;
1445          }
1446  
1447          if ($this->currentFixingCost < $this->maxFixingCost)
1448          {
1449              if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1450              {
1451                  // This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1452                  return;
1453              }
1454          }
1455  
1456          if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1457          {
1458              $this->logger->err(
1459                  'Nesting limit exceeded',
1460                  [
1461                      'tag'          => $tag,
1462                      'tagName'      => $tagName,
1463                      'nestingLimit' => $tagConfig['nestingLimit']
1464                  ]
1465              );
1466              $tag->invalidate();
1467  
1468              return;
1469          }
1470  
1471          if (!$this->tagIsAllowed($tagName))
1472          {
1473              $msg     = 'Tag is not allowed in this context';
1474              $context = ['tag' => $tag, 'tagName' => $tagName];
1475              if ($tag->getLen() > 0)
1476              {
1477                  $this->logger->warn($msg, $context);
1478              }
1479              else
1480              {
1481                  $this->logger->debug($msg, $context);
1482              }
1483              $tag->invalidate();
1484  
1485              return;
1486          }
1487  
1488          if ($this->requireAncestor($tag))
1489          {
1490              $tag->invalidate();
1491  
1492              return;
1493          }
1494  
1495          // If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1496          // immediately followed by an end tag, we replace it with a self-closing tag with the same
1497          // properties
1498          if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1499           && !$tag->isSelfClosingTag()
1500           && !$tag->getEndTag()
1501           && !$this->isFollowedByClosingTag($tag))
1502          {
1503              $newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1504              $newTag->setAttributes($tag->getAttributes());
1505              $newTag->setFlags($tag->getFlags());
1506  
1507              $tag = $newTag;
1508          }
1509  
1510          if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1511           && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1512          {
1513              $this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1514          }
1515  
1516          // This tag is valid, output it and update the context
1517          $this->outputTag($tag);
1518          $this->pushContext($tag);
1519  
1520          // Apply the createChild rules if applicable
1521          $this->createChild($tag);
1522      }
1523  
1524      /**
1525      * Process given end tag at current position
1526      *
1527      * @param  Tag  $tag end tag
1528      * @return void
1529      */
1530  	protected function processEndTag(Tag $tag)
1531      {
1532          $tagName = $tag->getName();
1533  
1534          if (empty($this->cntOpen[$tagName]))
1535          {
1536              // This is an end tag with no start tag
1537              return;
1538          }
1539  
1540          /**
1541          * @var array List of tags need to be closed before given tag
1542          */
1543          $closeTags = [];
1544  
1545          // Iterate through all open tags from last to first to find a match for our tag
1546          $i = count($this->openTags);
1547          while (--$i >= 0)
1548          {
1549              $openTag = $this->openTags[$i];
1550  
1551              if ($tag->canClose($openTag))
1552              {
1553                  break;
1554              }
1555  
1556              $closeTags[] = $openTag;
1557              ++$this->currentFixingCost;
1558          }
1559  
1560          if ($i < 0)
1561          {
1562              // Did not find a matching tag
1563              $this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1564  
1565              return;
1566          }
1567  
1568          // Accumulate flags to determine whether whitespace should be trimmed
1569          $flags = $tag->getFlags();
1570          foreach ($closeTags as $openTag)
1571          {
1572              $flags |= $openTag->getFlags();
1573          }
1574          $ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1575  
1576          // Only reopen tags if we haven't exceeded our "fixing" budget
1577          $keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1578  
1579          // Iterate over tags that are being closed, output their end tag and collect tags to be
1580          // reopened
1581          $reopenTags = [];
1582          foreach ($closeTags as $openTag)
1583          {
1584              $openTagName = $openTag->getName();
1585  
1586              // Test whether this tag should be reopened automatically
1587              if ($keepReopening)
1588              {
1589                  if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1590                  {
1591                      $reopenTags[] = $openTag;
1592                  }
1593                  else
1594                  {
1595                      $keepReopening = false;
1596                  }
1597              }
1598  
1599              // Find the earliest position we can close this open tag
1600              $tagPos = $tag->getPos();
1601              if ($ignoreWhitespace)
1602              {
1603                  $tagPos = $this->getMagicEndPos($tagPos);
1604              }
1605  
1606              // Output an end tag to close this start tag, then update the context
1607              $endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1608              $endTag->setFlags($openTag->getFlags());
1609              $this->outputTag($endTag);
1610              $this->popContext();
1611          }
1612  
1613          // Output our tag, moving the cursor past it, then update the context
1614          $this->outputTag($tag);
1615          $this->popContext();
1616  
1617          // If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1618          // close tags that are already being closed now. Also, filter our list of tags being
1619          // reopened by removing those that would immediately be closed
1620          if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1621          {
1622              /**
1623              * @var integer Rightmost position of the portion of text to ignore
1624              */
1625              $ignorePos = $this->pos;
1626  
1627              $i = count($this->tagStack);
1628              while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1629              {
1630                  $upcomingTag = $this->tagStack[$i];
1631  
1632                  // Test whether the upcoming tag is positioned at current "ignore" position and it's
1633                  // strictly an end tag (not a start tag or a self-closing tag)
1634                  if ($upcomingTag->getPos() > $ignorePos
1635                   || $upcomingTag->isStartTag())
1636                  {
1637                      break;
1638                  }
1639  
1640                  // Test whether this tag would close any of the tags we're about to reopen
1641                  $j = count($closeTags);
1642  
1643                  while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1644                  {
1645                      if ($upcomingTag->canClose($closeTags[$j]))
1646                      {
1647                          // Remove the tag from the lists and reset the keys
1648                          array_splice($closeTags, $j, 1);
1649  
1650                          if (isset($reopenTags[$j]))
1651                          {
1652                              array_splice($reopenTags, $j, 1);
1653                          }
1654  
1655                          // Extend the ignored text to cover this tag
1656                          $ignorePos = max(
1657                              $ignorePos,
1658                              $upcomingTag->getPos() + $upcomingTag->getLen()
1659                          );
1660  
1661                          break;
1662                      }
1663                  }
1664              }
1665  
1666              if ($ignorePos > $this->pos)
1667              {
1668                  /**
1669                  * @todo have a method that takes (pos,len) rather than a Tag
1670                  */
1671                  $this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1672              }
1673          }
1674  
1675          // Re-add tags that need to be reopened, at current cursor position
1676          foreach ($reopenTags as $startTag)
1677          {
1678              $newTag = $this->addCopyTag($startTag, $this->pos, 0);
1679  
1680              // Re-pair the new tag
1681              $endTag = $startTag->getEndTag();
1682              if ($endTag)
1683              {
1684                  $newTag->pairWith($endTag);
1685              }
1686          }
1687      }
1688  
1689      /**
1690      * Update counters and replace current context with its parent context
1691      *
1692      * @return void
1693      */
1694  	protected function popContext()
1695      {
1696          $tag = array_pop($this->openTags);
1697          --$this->cntOpen[$tag->getName()];
1698          $this->context = $this->context['parentContext'];
1699      }
1700  
1701      /**
1702      * Update counters and replace current context with a new context based on given tag
1703      *
1704      * If given tag is a self-closing tag, the context won't change
1705      *
1706      * @param  Tag  $tag Start tag (including self-closing)
1707      * @return void
1708      */
1709  	protected function pushContext(Tag $tag)
1710      {
1711          $tagName   = $tag->getName();
1712          $tagFlags  = $tag->getFlags();
1713          $tagConfig = $this->tagsConfig[$tagName];
1714  
1715          ++$this->cntTotal[$tagName];
1716  
1717          // If this is a self-closing tag, the context remains the same
1718          if ($tag->isSelfClosingTag())
1719          {
1720              return;
1721          }
1722  
1723          // Recompute the allowed tags
1724          $allowed = [];
1725          foreach ($this->context['allowed'] as $k => $v)
1726          {
1727              // If the current tag is not transparent, override the low bits (allowed children) of
1728              // current context with its high bits (allowed descendants)
1729              if (!($tagFlags & self::RULE_IS_TRANSPARENT))
1730              {
1731                  $v = ($v & 0xFF00) | ($v >> 8);
1732              }
1733              $allowed[] = $tagConfig['allowed'][$k] & $v;
1734          }
1735  
1736          // Use this tag's flags as a base for this context and add inherited rules
1737          $flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1738  
1739          // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1740          if ($flags & self::RULE_DISABLE_AUTO_BR)
1741          {
1742              $flags &= ~self::RULE_ENABLE_AUTO_BR;
1743          }
1744  
1745          ++$this->cntOpen[$tagName];
1746          $this->openTags[] = $tag;
1747          $this->context = [
1748              'allowed'       => $allowed,
1749              'flags'         => $flags,
1750              'inParagraph'   => false,
1751              'parentContext' => $this->context
1752          ];
1753      }
1754  
1755      /**
1756      * Return whether given tag is allowed in current context
1757      *
1758      * @param  string $tagName
1759      * @return bool
1760      */
1761  	protected function tagIsAllowed($tagName)
1762      {
1763          $n = $this->tagsConfig[$tagName]['bitNumber'];
1764  
1765          return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1766      }
1767  
1768      //==========================================================================
1769      // Tag stack
1770      //==========================================================================
1771  
1772      /**
1773      * Add a start tag
1774      *
1775      * @param  string  $name Name of the tag
1776      * @param  integer $pos  Position of the tag in the text
1777      * @param  integer $len  Length of text consumed by the tag
1778      * @param  integer $prio Tag's priority
1779      * @return Tag
1780      */
1781  	public function addStartTag($name, $pos, $len, $prio = 0)
1782      {
1783          return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1784      }
1785  
1786      /**
1787      * Add an end tag
1788      *
1789      * @param  string  $name Name of the tag
1790      * @param  integer $pos  Position of the tag in the text
1791      * @param  integer $len  Length of text consumed by the tag
1792      * @param  integer $prio Tag's priority
1793      * @return Tag
1794      */
1795  	public function addEndTag($name, $pos, $len, $prio = 0)
1796      {
1797          return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1798      }
1799  
1800      /**
1801      * Add a self-closing tag
1802      *
1803      * @param  string  $name Name of the tag
1804      * @param  integer $pos  Position of the tag in the text
1805      * @param  integer $len  Length of text consumed by the tag
1806      * @param  integer $prio Tag's priority
1807      * @return Tag
1808      */
1809  	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1810      {
1811          return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1812      }
1813  
1814      /**
1815      * Add a 0-width "br" tag to force a line break at given position
1816      *
1817      * @param  integer $pos  Position of the tag in the text
1818      * @param  integer $prio Tag's priority
1819      * @return Tag
1820      */
1821  	public function addBrTag($pos, $prio = 0)
1822      {
1823          return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1824      }
1825  
1826      /**
1827      * Add an "ignore" tag
1828      *
1829      * @param  integer $pos  Position of the tag in the text
1830      * @param  integer $len  Length of text consumed by the tag
1831      * @param  integer $prio Tag's priority
1832      * @return Tag
1833      */
1834  	public function addIgnoreTag($pos, $len, $prio = 0)
1835      {
1836          return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1837      }
1838  
1839      /**
1840      * Add a paragraph break at given position
1841      *
1842      * Uses a zero-width tag that is actually never output in the result
1843      *
1844      * @param  integer $pos  Position of the tag in the text
1845      * @param  integer $prio Tag's priority
1846      * @return Tag
1847      */
1848  	public function addParagraphBreak($pos, $prio = 0)
1849      {
1850          return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1851      }
1852  
1853      /**
1854      * Add a copy of given tag at given position and length
1855      *
1856      * @param  Tag     $tag  Original tag
1857      * @param  integer $pos  Copy's position
1858      * @param  integer $len  Copy's length
1859      * @param  integer $prio Copy's priority (same as original by default)
1860      * @return Tag           Copy tag
1861      */
1862  	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1863      {
1864          if (!isset($prio))
1865          {
1866              $prio = $tag->getSortPriority();
1867          }
1868          $copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1869          $copy->setAttributes($tag->getAttributes());
1870  
1871          return $copy;
1872      }
1873  
1874      /**
1875      * Add a tag
1876      *
1877      * @param  integer $type Tag's type
1878      * @param  string  $name Name of the tag
1879      * @param  integer $pos  Position of the tag in the text
1880      * @param  integer $len  Length of text consumed by the tag
1881      * @param  integer $prio Tag's priority
1882      * @return Tag
1883      */
1884  	protected function addTag($type, $name, $pos, $len, $prio)
1885      {
1886          // Create the tag
1887          $tag = new Tag($type, $name, $pos, $len, $prio);
1888  
1889          // Set this tag's rules bitfield
1890          if (isset($this->tagsConfig[$name]))
1891          {
1892              $tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1893          }
1894  
1895          // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1896          // position is negative or if it's out of bounds
1897          if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1898           || $this->isInvalidTextSpan($pos, $len))
1899          {
1900              $tag->invalidate();
1901          }
1902          elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1903          {
1904              $this->logger->warn(
1905                  'Tag is disabled',
1906                  [
1907                      'tag'     => $tag,
1908                      'tagName' => $name
1909                  ]
1910              );
1911              $tag->invalidate();
1912          }
1913          else
1914          {
1915              $this->insertTag($tag);
1916          }
1917  
1918          return $tag;
1919      }
1920  
1921      /**
1922      * Test whether given text span is outside text boundaries or an invalid UTF sequence
1923      *
1924      * @param  integer $pos Start of text
1925      * @param  integer $len Length of text
1926      * @return bool
1927      */
1928  	protected function isInvalidTextSpan($pos, $len)
1929      {
1930          return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1931      }
1932  
1933      /**
1934      * Insert given tag in the tag stack
1935      *
1936      * @param  Tag  $tag
1937      * @return void
1938      */
1939  	protected function insertTag(Tag $tag)
1940      {
1941          if (!$this->tagStackIsSorted)
1942          {
1943              $this->tagStack[] = $tag;
1944          }
1945          else
1946          {
1947              // Scan the stack and copy every tag to the next slot until we find the correct index
1948              $i   = count($this->tagStack);
1949              $key = $this->getSortKey($tag);
1950              while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1951              {
1952                  $this->tagStack[$i] = $this->tagStack[$i - 1];
1953                  --$i;
1954              }
1955              $this->tagStack[$i] = $tag;
1956          }
1957      }
1958  
1959      /**
1960      * Add a pair of tags
1961      *
1962      * @param  string  $name     Name of the tags
1963      * @param  integer $startPos Position of the start tag
1964      * @param  integer $startLen Length of the start tag
1965      * @param  integer $endPos   Position of the start tag
1966      * @param  integer $endLen   Length of the start tag
1967      * @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1968      * @return Tag               Start tag
1969      */
1970  	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1971      {
1972          // NOTE: the end tag is added first to try to keep the stack in the correct order
1973          $endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1974          $startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1975          $startTag->pairWith($endTag);
1976  
1977          return $startTag;
1978      }
1979  
1980      /**
1981      * Add a tag that represents a verbatim copy of the original text
1982      *
1983      * @param  integer $pos  Position of the tag in the text
1984      * @param  integer $len  Length of text consumed by the tag
1985      * @param  integer $prio Tag's priority
1986      * @return Tag
1987      */
1988  	public function addVerbatim($pos, $len, $prio = 0)
1989      {
1990          return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
1991      }
1992  
1993      /**
1994      * Sort tags by position and precedence
1995      *
1996      * @return void
1997      */
1998  	protected function sortTags()
1999      {
2000          $arr = [];
2001          foreach ($this->tagStack as $i => $tag)
2002          {
2003              $key       = $this->getSortKey($tag, $i);
2004              $arr[$key] = $tag;
2005          }
2006          krsort($arr);
2007  
2008          $this->tagStack         = array_values($arr);
2009          $this->tagStackIsSorted = true;
2010      }
2011  
2012      /**
2013      * Generate a key for given tag that can be used to compare its position using lexical comparisons
2014      *
2015      * Tags are sorted by position first, then by priority, then by whether they consume any text,
2016      * then by length, and finally in order of their creation.
2017      *
2018      * The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2019      * are at the end of the array.
2020      *
2021      * @param  Tag     $tag
2022      * @param  integer $tagIndex
2023      * @return string
2024      */
2025  	protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2026      {
2027          // Ensure that negative values are sorted correctly by flagging them and making them positive
2028          $prioFlag = ($tag->getSortPriority() >= 0);
2029          $prio     = $tag->getSortPriority();
2030          if (!$prioFlag)
2031          {
2032              $prio += (1 << 30);
2033          }
2034  
2035          // Sort 0-width tags separately from the rest
2036          $lenFlag = ($tag->getLen() > 0);
2037          if ($lenFlag)
2038          {
2039              // Inverse their length so that longest matches are processed first
2040              $lenOrder = $this->textLen - $tag->getLen();
2041          }
2042          else
2043          {
2044              // Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2045              // pairs
2046              $order = [
2047                  Tag::END_TAG          => 0,
2048                  Tag::SELF_CLOSING_TAG => 1,
2049                  Tag::START_TAG        => 2
2050              ];
2051              $lenOrder = $order[$tag->getType()];
2052          }
2053  
2054          return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2055      }
2056  }


Generated: Sun Jun 23 12:25:44 2024 Cross-referenced by PHPXref 0.7.1