[ Index ]

PHP Cross Reference of phpBB-3.3.3-deutsch

title

Body

[close]

/vendor/s9e/text-formatter/src/ -> Parser.php (source)

   1  <?php
   2  
   3  /**
   4  * @package   s9e\TextFormatter
   5  * @copyright Copyright (c) 2010-2020 The s9e authors
   6  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License
   7  */
   8  namespace s9e\TextFormatter;
   9  
  10  use InvalidArgumentException;
  11  use RuntimeException;
  12  use s9e\TextFormatter\Parser\FilterProcessing;
  13  use s9e\TextFormatter\Parser\Logger;
  14  use s9e\TextFormatter\Parser\Tag;
  15  
  16  class Parser
  17  {
  18      /**#@+
  19      * Boolean rules bitfield
  20      */
  21      const RULE_AUTO_CLOSE        = 1 << 0;
  22      const RULE_AUTO_REOPEN       = 1 << 1;
  23      const RULE_BREAK_PARAGRAPH   = 1 << 2;
  24      const RULE_CREATE_PARAGRAPHS = 1 << 3;
  25      const RULE_DISABLE_AUTO_BR   = 1 << 4;
  26      const RULE_ENABLE_AUTO_BR    = 1 << 5;
  27      const RULE_IGNORE_TAGS       = 1 << 6;
  28      const RULE_IGNORE_TEXT       = 1 << 7;
  29      const RULE_IGNORE_WHITESPACE = 1 << 8;
  30      const RULE_IS_TRANSPARENT    = 1 << 9;
  31      const RULE_PREVENT_BR        = 1 << 10;
  32      const RULE_SUSPEND_AUTO_BR   = 1 << 11;
  33      const RULE_TRIM_FIRST_LINE   = 1 << 12;
  34      /**#@-*/
  35  
  36      /**
  37      * Bitwise disjunction of rules related to automatic line breaks
  38      */
  39      const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
  40  
  41      /**
  42      * Bitwise disjunction of rules that are inherited by subcontexts
  43      */
  44      const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
  45  
  46      /**
  47      * All the characters that are considered whitespace
  48      */
  49      const WHITESPACE = " \n\t";
  50  
  51      /**
  52      * @var array Number of open tags for each tag name
  53      */
  54      protected $cntOpen;
  55  
  56      /**
  57      * @var array Number of times each tag has been used
  58      */
  59      protected $cntTotal;
  60  
  61      /**
  62      * @var array Current context
  63      */
  64      protected $context;
  65  
  66      /**
  67      * @var integer How hard the parser has worked on fixing bad markup so far
  68      */
  69      protected $currentFixingCost;
  70  
  71      /**
  72      * @var Tag Current tag being processed
  73      */
  74      protected $currentTag;
  75  
  76      /**
  77      * @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
  78      */
  79      protected $isRich;
  80  
  81      /**
  82      * @var Logger This parser's logger
  83      */
  84      protected $logger;
  85  
  86      /**
  87      * @var integer How hard the parser should work on fixing bad markup
  88      */
  89      public $maxFixingCost = 10000;
  90  
  91      /**
  92      * @var array Associative array of namespace prefixes in use in document (prefixes used as key)
  93      */
  94      protected $namespaces;
  95  
  96      /**
  97      * @var array Stack of open tags (instances of Tag)
  98      */
  99      protected $openTags;
 100  
 101      /**
 102      * @var string This parser's output
 103      */
 104      protected $output;
 105  
 106      /**
 107      * @var integer Position of the cursor in the original text
 108      */
 109      protected $pos;
 110  
 111      /**
 112      * @var array Array of callbacks, using plugin names as keys
 113      */
 114      protected $pluginParsers = [];
 115  
 116      /**
 117      * @var array Associative array of [pluginName => pluginConfig]
 118      */
 119      protected $pluginsConfig;
 120  
 121      /**
 122      * @var array Variables registered for use in filters
 123      */
 124      public $registeredVars = [];
 125  
 126      /**
 127      * @var array Root context, used at the root of the document
 128      */
 129      protected $rootContext;
 130  
 131      /**
 132      * @var array Tags' config
 133      */
 134      protected $tagsConfig;
 135  
 136      /**
 137      * @var array Tag storage
 138      */
 139      protected $tagStack;
 140  
 141      /**
 142      * @var bool Whether the tags in the stack are sorted
 143      */
 144      protected $tagStackIsSorted;
 145  
 146      /**
 147      * @var string Text being parsed
 148      */
 149      protected $text;
 150  
 151      /**
 152      * @var integer Length of the text being parsed
 153      */
 154      protected $textLen;
 155  
 156      /**
 157      * @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
 158      *              whether the parser was reset during execution
 159      */
 160      protected $uid = 0;
 161  
 162      /**
 163      * @var integer Position before which we output text verbatim, without paragraphs or linebreaks
 164      */
 165      protected $wsPos;
 166  
 167      /**
 168      * Constructor
 169      */
 170  	public function __construct(array $config)
 171      {
 172          $this->pluginsConfig  = $config['plugins'];
 173          $this->registeredVars = $config['registeredVars'];
 174          $this->rootContext    = $config['rootContext'];
 175          $this->tagsConfig     = $config['tags'];
 176  
 177          $this->__wakeup();
 178      }
 179  
 180      /**
 181      * Serializer
 182      *
 183      * Returns the properties that need to persist through serialization.
 184      *
 185      * NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
 186      * of the serializer to the user (e.g. igbinary)
 187      *
 188      * @return array
 189      */
 190  	public function __sleep()
 191      {
 192          return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
 193      }
 194  
 195      /**
 196      * Unserializer
 197      *
 198      * @return void
 199      */
 200  	public function __wakeup()
 201      {
 202          $this->logger = new Logger;
 203      }
 204  
 205      /**
 206      * Reset the parser for a new parsing
 207      *
 208      * @param  string $text Text to be parsed
 209      * @return void
 210      */
 211  	protected function reset($text)
 212      {
 213          // Reject invalid UTF-8
 214          if (!preg_match('//u', $text))
 215          {
 216              throw new InvalidArgumentException('Invalid UTF-8 input');
 217          }
 218  
 219          // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
 220          $text = preg_replace('/\\r\\n?/', "\n", $text);
 221          $text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
 222  
 223          // Clear the logs
 224          $this->logger->clear();
 225  
 226          // Initialize the rest
 227          $this->cntOpen           = [];
 228          $this->cntTotal          = [];
 229          $this->currentFixingCost = 0;
 230          $this->currentTag        = null;
 231          $this->isRich            = false;
 232          $this->namespaces        = [];
 233          $this->openTags          = [];
 234          $this->output            = '';
 235          $this->pos               = 0;
 236          $this->tagStack          = [];
 237          $this->tagStackIsSorted  = false;
 238          $this->text              = $text;
 239          $this->textLen           = strlen($text);
 240          $this->wsPos             = 0;
 241  
 242          // Initialize the root context
 243          $this->context = $this->rootContext;
 244          $this->context['inParagraph'] = false;
 245  
 246          // Bump the UID
 247          ++$this->uid;
 248      }
 249  
 250      /**
 251      * Set a tag's option
 252      *
 253      * This method ensures that the tag's config is a value and not a reference, to prevent
 254      * potential side-effects. References contained *inside* the tag's config are left untouched
 255      *
 256      * @param  string $tagName     Tag's name
 257      * @param  string $optionName  Option's name
 258      * @param  mixed  $optionValue Option's value
 259      * @return void
 260      */
 261  	protected function setTagOption($tagName, $optionName, $optionValue)
 262      {
 263          if (isset($this->tagsConfig[$tagName]))
 264          {
 265              // Copy the tag's config and remove it. That will destroy the reference
 266              $tagConfig = $this->tagsConfig[$tagName];
 267              unset($this->tagsConfig[$tagName]);
 268  
 269              // Set the new value and replace the tag's config
 270              $tagConfig[$optionName]     = $optionValue;
 271              $this->tagsConfig[$tagName] = $tagConfig;
 272          }
 273      }
 274  
 275      //==========================================================================
 276      // Public API
 277      //==========================================================================
 278  
 279      /**
 280      * Disable a tag
 281      *
 282      * @param  string $tagName Name of the tag
 283      * @return void
 284      */
 285  	public function disableTag($tagName)
 286      {
 287          $this->setTagOption($tagName, 'isDisabled', true);
 288      }
 289  
 290      /**
 291      * Enable a tag
 292      *
 293      * @param  string $tagName Name of the tag
 294      * @return void
 295      */
 296  	public function enableTag($tagName)
 297      {
 298          if (isset($this->tagsConfig[$tagName]))
 299          {
 300              unset($this->tagsConfig[$tagName]['isDisabled']);
 301          }
 302      }
 303  
 304      /**
 305      * Get this parser's Logger instance
 306      *
 307      * @return Logger
 308      */
 309  	public function getLogger()
 310      {
 311          return $this->logger;
 312      }
 313  
 314      /**
 315      * Return the last text parsed
 316      *
 317      * This method returns the normalized text, which may be slightly different from the original
 318      * text in that EOLs are normalized to LF and other control codes are stripped. This method is
 319      * meant to be used in support of processing log entries, which contain offsets based on the
 320      * normalized text
 321      *
 322      * @see Parser::reset()
 323      *
 324      * @return string
 325      */
 326  	public function getText()
 327      {
 328          return $this->text;
 329      }
 330  
 331      /**
 332      * Parse a text
 333      *
 334      * @param  string $text Text to parse
 335      * @return string       XML representation
 336      */
 337  	public function parse($text)
 338      {
 339          // Reset the parser and save the uid
 340          $this->reset($text);
 341          $uid = $this->uid;
 342  
 343          // Do the heavy lifting
 344          $this->executePluginParsers();
 345          $this->processTags();
 346  
 347          // Finalize the document
 348          $this->finalizeOutput();
 349  
 350          // Check the uid in case a plugin or a filter reset the parser mid-execution
 351          if ($this->uid !== $uid)
 352          {
 353              throw new RuntimeException('The parser has been reset during execution');
 354          }
 355  
 356          // Log a warning if the fixing cost limit was exceeded
 357          if ($this->currentFixingCost > $this->maxFixingCost)
 358          {
 359              $this->logger->warn('Fixing cost limit exceeded');
 360          }
 361  
 362          return $this->output;
 363      }
 364  
 365      /**
 366      * Change a tag's tagLimit
 367      *
 368      * NOTE: the default tagLimit should generally be set during configuration instead
 369      *
 370      * @param  string  $tagName  The tag's name, in UPPERCASE
 371      * @param  integer $tagLimit
 372      * @return void
 373      */
 374  	public function setTagLimit($tagName, $tagLimit)
 375      {
 376          $this->setTagOption($tagName, 'tagLimit', $tagLimit);
 377      }
 378  
 379      /**
 380      * Change a tag's nestingLimit
 381      *
 382      * NOTE: the default nestingLimit should generally be set during configuration instead
 383      *
 384      * @param  string  $tagName      The tag's name, in UPPERCASE
 385      * @param  integer $nestingLimit
 386      * @return void
 387      */
 388  	public function setNestingLimit($tagName, $nestingLimit)
 389      {
 390          $this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
 391      }
 392  
 393      //==========================================================================
 394      // Output handling
 395      //==========================================================================
 396  
 397      /**
 398      * Finalize the output by appending the rest of the unprocessed text and create the root node
 399      *
 400      * @return void
 401      */
 402  	protected function finalizeOutput()
 403      {
 404          // Output the rest of the text and close the last paragraph
 405          $this->outputText($this->textLen, 0, true);
 406  
 407          // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
 408          do
 409          {
 410              $this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
 411          }
 412          while ($cnt > 0);
 413  
 414          // Merge consecutive <i> tags
 415          if (strpos($this->output, '</i><i>') !== false)
 416          {
 417              $this->output = str_replace('</i><i>', '', $this->output);
 418          }
 419  
 420          // Remove control characters from the output to ensure it's valid XML
 421          $this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F])', '', $this->output);
 422  
 423          // Encode Unicode characters that are outside of the BMP
 424          $this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
 425  
 426          // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
 427          $tagName = ($this->isRich) ? 'r' : 't';
 428  
 429          // Prepare the root node with all the namespace declarations
 430          $tmp = '<' . $tagName;
 431          foreach (array_keys($this->namespaces) as $prefix)
 432          {
 433              $tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
 434          }
 435  
 436          $this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
 437      }
 438  
 439      /**
 440      * Append a tag to the output
 441      *
 442      * @param  Tag  $tag Tag to append
 443      * @return void
 444      */
 445  	protected function outputTag(Tag $tag)
 446      {
 447          $this->isRich = true;
 448  
 449          $tagName  = $tag->getName();
 450          $tagPos   = $tag->getPos();
 451          $tagLen   = $tag->getLen();
 452          $tagFlags = $tag->getFlags();
 453  
 454          if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
 455          {
 456              $skipBefore = 1;
 457              $skipAfter  = ($tag->isEndTag()) ? 2 : 1;
 458          }
 459          else
 460          {
 461              $skipBefore = $skipAfter = 0;
 462          }
 463  
 464          // Current paragraph must end before the tag if:
 465          //  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
 466          //  - the tag is an end tag (but not self-closing)
 467          $closeParagraph = false;
 468          if ($tag->isStartTag())
 469          {
 470              if ($tagFlags & self::RULE_BREAK_PARAGRAPH)
 471              {
 472                  $closeParagraph = true;
 473              }
 474          }
 475          else
 476          {
 477              $closeParagraph = true;
 478          }
 479  
 480          // Let the cursor catch up with this tag's position
 481          $this->outputText($tagPos, $skipBefore, $closeParagraph);
 482  
 483          // Capture the text consumed by the tag
 484          $tagText = ($tagLen)
 485                   ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
 486                   : '';
 487  
 488          // Output current tag
 489          if ($tag->isStartTag())
 490          {
 491              // Handle paragraphs before opening the tag
 492              if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
 493              {
 494                  $this->outputParagraphStart($tagPos);
 495              }
 496  
 497              // Record this tag's namespace, if applicable
 498              $colonPos = strpos($tagName, ':');
 499              if ($colonPos)
 500              {
 501                  $this->namespaces[substr($tagName, 0, $colonPos)] = 0;
 502              }
 503  
 504              // Open the start tag and add its attributes, but don't close the tag
 505              $this->output .= '<' . $tagName;
 506  
 507              // We output the attributes in lexical order. Helps canonicalizing the output and could
 508              // prove useful someday
 509              $attributes = $tag->getAttributes();
 510              ksort($attributes);
 511  
 512              foreach ($attributes as $attrName => $attrValue)
 513              {
 514                  $this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
 515              }
 516  
 517              if ($tag->isSelfClosingTag())
 518              {
 519                  if ($tagLen)
 520                  {
 521                      $this->output .= '>' . $tagText . '</' . $tagName . '>';
 522                  }
 523                  else
 524                  {
 525                      $this->output .= '/>';
 526                  }
 527              }
 528              elseif ($tagLen)
 529              {
 530                  $this->output .= '><s>' . $tagText . '</s>';
 531              }
 532              else
 533              {
 534                  $this->output .= '>';
 535              }
 536          }
 537          else
 538          {
 539              if ($tagLen)
 540              {
 541                  $this->output .= '<e>' . $tagText . '</e>';
 542              }
 543  
 544              $this->output .= '</' . $tagName . '>';
 545          }
 546  
 547          // Move the cursor past the tag
 548          $this->pos = $tagPos + $tagLen;
 549  
 550          // Skip newlines (no other whitespace) after this tag
 551          $this->wsPos = $this->pos;
 552          while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
 553          {
 554              // Decrement the number of lines to skip
 555              --$skipAfter;
 556  
 557              // Move the cursor past the newline
 558              ++$this->wsPos;
 559          }
 560      }
 561  
 562      /**
 563      * Output the text between the cursor's position (included) and given position (not included)
 564      *
 565      * @param  integer $catchupPos     Position we're catching up to
 566      * @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
 567      * @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
 568      * @return void
 569      */
 570  	protected function outputText($catchupPos, $maxLines, $closeParagraph)
 571      {
 572          if ($closeParagraph)
 573          {
 574              if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
 575              {
 576                  $closeParagraph = false;
 577              }
 578              else
 579              {
 580                  // Ignore any number of lines at the end if we're closing a paragraph
 581                  $maxLines = -1;
 582              }
 583          }
 584  
 585          if ($this->pos >= $catchupPos)
 586          {
 587              // We're already there, close the paragraph if applicable and return
 588              if ($closeParagraph)
 589              {
 590                  $this->outputParagraphEnd();
 591              }
 592  
 593              return;
 594          }
 595  
 596          // Skip over previously identified whitespace if applicable
 597          if ($this->wsPos > $this->pos)
 598          {
 599              $skipPos       = min($catchupPos, $this->wsPos);
 600              $this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
 601              $this->pos     = $skipPos;
 602  
 603              if ($this->pos >= $catchupPos)
 604              {
 605                  // Skipped everything. Close the paragraph if applicable and return
 606                  if ($closeParagraph)
 607                  {
 608                      $this->outputParagraphEnd();
 609                  }
 610  
 611                  return;
 612              }
 613          }
 614  
 615          // Test whether we're even supposed to output anything
 616          if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
 617          {
 618              $catchupLen  = $catchupPos - $this->pos;
 619              $catchupText = substr($this->text, $this->pos, $catchupLen);
 620  
 621              // If the catchup text is not entirely composed of whitespace, we put it inside ignore
 622              // tags
 623              if (strspn($catchupText, " \n\t") < $catchupLen)
 624              {
 625                  $catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
 626              }
 627  
 628              $this->output .= $catchupText;
 629              $this->pos = $catchupPos;
 630  
 631              if ($closeParagraph)
 632              {
 633                  $this->outputParagraphEnd();
 634              }
 635  
 636              return;
 637          }
 638  
 639          // Compute the amount of text to ignore at the end of the output
 640          $ignorePos = $catchupPos;
 641          $ignoreLen = 0;
 642  
 643          // Ignore as many lines (including whitespace) as specified
 644          while ($maxLines && --$ignorePos >= $this->pos)
 645          {
 646              $c = $this->text[$ignorePos];
 647              if (strpos(self::WHITESPACE, $c) === false)
 648              {
 649                  break;
 650              }
 651  
 652              if ($c === "\n")
 653              {
 654                  --$maxLines;
 655              }
 656  
 657              ++$ignoreLen;
 658          }
 659  
 660          // Adjust $catchupPos to ignore the text at the end
 661          $catchupPos -= $ignoreLen;
 662  
 663          // Break down the text in paragraphs if applicable
 664          if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
 665          {
 666              if (!$this->context['inParagraph'])
 667              {
 668                  $this->outputWhitespace($catchupPos);
 669  
 670                  if ($catchupPos > $this->pos)
 671                  {
 672                      $this->outputParagraphStart($catchupPos);
 673                  }
 674              }
 675  
 676              // Look for a paragraph break in this text
 677              $pbPos = strpos($this->text, "\n\n", $this->pos);
 678  
 679              while ($pbPos !== false && $pbPos < $catchupPos)
 680              {
 681                  $this->outputText($pbPos, 0, true);
 682                  $this->outputParagraphStart($catchupPos);
 683  
 684                  $pbPos = strpos($this->text, "\n\n", $this->pos);
 685              }
 686          }
 687  
 688          // Capture, escape and output the text
 689          if ($catchupPos > $this->pos)
 690          {
 691              $catchupText = htmlspecialchars(
 692                  substr($this->text, $this->pos, $catchupPos - $this->pos),
 693                  ENT_NOQUOTES,
 694                  'UTF-8'
 695              );
 696  
 697              // Format line breaks if applicable
 698              if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
 699              {
 700                  $catchupText = str_replace("\n", "<br/>\n", $catchupText);
 701              }
 702  
 703              $this->output .= $catchupText;
 704          }
 705  
 706          // Close the paragraph if applicable
 707          if ($closeParagraph)
 708          {
 709              $this->outputParagraphEnd();
 710          }
 711  
 712          // Add the ignored text if applicable
 713          if ($ignoreLen)
 714          {
 715              $this->output .= substr($this->text, $catchupPos, $ignoreLen);
 716          }
 717  
 718          // Move the cursor past the text
 719          $this->pos = $catchupPos + $ignoreLen;
 720      }
 721  
 722      /**
 723      * Output a linebreak tag
 724      *
 725      * @param  Tag  $tag
 726      * @return void
 727      */
 728  	protected function outputBrTag(Tag $tag)
 729      {
 730          $this->outputText($tag->getPos(), 0, false);
 731          $this->output .= '<br/>';
 732      }
 733  
 734      /**
 735      * Output an ignore tag
 736      *
 737      * @param  Tag  $tag
 738      * @return void
 739      */
 740  	protected function outputIgnoreTag(Tag $tag)
 741      {
 742          $tagPos = $tag->getPos();
 743          $tagLen = $tag->getLen();
 744  
 745          // Capture the text to ignore
 746          $ignoreText = substr($this->text, $tagPos, $tagLen);
 747  
 748          // Catch up with the tag's position then output the tag
 749          $this->outputText($tagPos, 0, false);
 750          $this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
 751          $this->isRich = true;
 752  
 753          // Move the cursor past this tag
 754          $this->pos = $tagPos + $tagLen;
 755      }
 756  
 757      /**
 758      * Start a paragraph between current position and given position, if applicable
 759      *
 760      * @param  integer $maxPos Rightmost position at which the paragraph can be opened
 761      * @return void
 762      */
 763  	protected function outputParagraphStart($maxPos)
 764      {
 765          // Do nothing if we're already in a paragraph, or if we don't use paragraphs
 766          if ($this->context['inParagraph']
 767           || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
 768          {
 769              return;
 770          }
 771  
 772          // Output the whitespace between $this->pos and $maxPos if applicable
 773          $this->outputWhitespace($maxPos);
 774  
 775          // Open the paragraph, but only if it's not at the very end of the text
 776          if ($this->pos < $this->textLen)
 777          {
 778              $this->output .= '<p>';
 779              $this->context['inParagraph'] = true;
 780          }
 781      }
 782  
 783      /**
 784      * Close current paragraph at current position if applicable
 785      *
 786      * @return void
 787      */
 788  	protected function outputParagraphEnd()
 789      {
 790          // Do nothing if we're not in a paragraph
 791          if (!$this->context['inParagraph'])
 792          {
 793              return;
 794          }
 795  
 796          $this->output .= '</p>';
 797          $this->context['inParagraph'] = false;
 798      }
 799  
 800      /**
 801      * Output the content of a verbatim tag
 802      *
 803      * @param  Tag  $tag
 804      * @return void
 805      */
 806  	protected function outputVerbatim(Tag $tag)
 807      {
 808          $flags = $this->context['flags'];
 809          $this->context['flags'] = $tag->getFlags();
 810          $this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
 811          $this->context['flags'] = $flags;
 812      }
 813  
 814      /**
 815      * Skip as much whitespace after current position as possible
 816      *
 817      * @param  integer $maxPos Rightmost character to be skipped
 818      * @return void
 819      */
 820  	protected function outputWhitespace($maxPos)
 821      {
 822          if ($maxPos > $this->pos)
 823          {
 824              $spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
 825  
 826              if ($spn)
 827              {
 828                  $this->output .= substr($this->text, $this->pos, $spn);
 829                  $this->pos += $spn;
 830              }
 831          }
 832      }
 833  
 834      //==========================================================================
 835      // Plugins handling
 836      //==========================================================================
 837  
 838      /**
 839      * Disable a plugin
 840      *
 841      * @param  string $pluginName Name of the plugin
 842      * @return void
 843      */
 844  	public function disablePlugin($pluginName)
 845      {
 846          if (isset($this->pluginsConfig[$pluginName]))
 847          {
 848              // Copy the plugin's config to remove the reference
 849              $pluginConfig = $this->pluginsConfig[$pluginName];
 850              unset($this->pluginsConfig[$pluginName]);
 851  
 852              // Update the value and replace the plugin's config
 853              $pluginConfig['isDisabled'] = true;
 854              $this->pluginsConfig[$pluginName] = $pluginConfig;
 855          }
 856      }
 857  
 858      /**
 859      * Enable a plugin
 860      *
 861      * @param  string $pluginName Name of the plugin
 862      * @return void
 863      */
 864  	public function enablePlugin($pluginName)
 865      {
 866          if (isset($this->pluginsConfig[$pluginName]))
 867          {
 868              $this->pluginsConfig[$pluginName]['isDisabled'] = false;
 869          }
 870      }
 871  
 872      /**
 873      * Execute given plugin
 874      *
 875      * @param  string $pluginName Plugin's name
 876      * @return void
 877      */
 878  	protected function executePluginParser($pluginName)
 879      {
 880          $pluginConfig = $this->pluginsConfig[$pluginName];
 881          if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
 882          {
 883              return;
 884          }
 885  
 886          $matches = [];
 887          if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit']))
 888          {
 889              $matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
 890              if (empty($matches))
 891              {
 892                  return;
 893              }
 894          }
 895  
 896          // Execute the plugin's parser, which will add tags via $this->addStartTag() and others
 897          call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
 898      }
 899  
 900      /**
 901      * Execute all the plugins
 902      *
 903      * @return void
 904      */
 905  	protected function executePluginParsers()
 906      {
 907          foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
 908          {
 909              if (empty($pluginConfig['isDisabled']))
 910              {
 911                  $this->executePluginParser($pluginName);
 912              }
 913          }
 914      }
 915  
 916      /**
 917      * Execute given regexp and returns as many matches as given limit
 918      *
 919      * @param  string  $regexp
 920      * @param  integer $limit
 921      * @return array
 922      */
 923  	protected function getMatches($regexp, $limit)
 924      {
 925          $cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
 926          if ($cnt > $limit)
 927          {
 928              $matches = array_slice($matches, 0, $limit);
 929          }
 930  
 931          return $matches;
 932      }
 933  
 934      /**
 935      * Get the cached callback for given plugin's parser
 936      *
 937      * @param  string $pluginName Plugin's name
 938      * @return callable
 939      */
 940  	protected function getPluginParser($pluginName)
 941      {
 942          // Cache a new instance of this plugin's parser if there isn't one already
 943          if (!isset($this->pluginParsers[$pluginName]))
 944          {
 945              $pluginConfig = $this->pluginsConfig[$pluginName];
 946              $className = (isset($pluginConfig['className']))
 947                         ? $pluginConfig['className']
 948                         : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
 949  
 950              // Register the parser as a callback
 951              $this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
 952          }
 953  
 954          return $this->pluginParsers[$pluginName];
 955      }
 956  
 957      /**
 958      * Register a parser
 959      *
 960      * Can be used to add a new parser with no plugin config, or pre-generate a parser for an
 961      * existing plugin
 962      *
 963      * @param  string   $pluginName
 964      * @param  callback $parser
 965      * @param  string   $regexp
 966      * @param  integer  $limit
 967      * @return void
 968      */
 969  	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
 970      {
 971          if (!is_callable($parser))
 972          {
 973              throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
 974          }
 975          // Create an empty config for this plugin to ensure it is executed
 976          if (!isset($this->pluginsConfig[$pluginName]))
 977          {
 978              $this->pluginsConfig[$pluginName] = [];
 979          }
 980          if (isset($regexp))
 981          {
 982              $this->pluginsConfig[$pluginName]['regexp']      = $regexp;
 983              $this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
 984          }
 985          $this->pluginParsers[$pluginName] = $parser;
 986      }
 987  
 988      //==========================================================================
 989      // Rules handling
 990      //==========================================================================
 991  
 992      /**
 993      * Apply closeAncestor rules associated with given tag
 994      *
 995      * @param  Tag  $tag Tag
 996      * @return bool      Whether a new tag has been added
 997      */
 998  	protected function closeAncestor(Tag $tag)
 999      {
1000          if (!empty($this->openTags))
1001          {
1002              $tagName   = $tag->getName();
1003              $tagConfig = $this->tagsConfig[$tagName];
1004  
1005              if (!empty($tagConfig['rules']['closeAncestor']))
1006              {
1007                  $i = count($this->openTags);
1008  
1009                  while (--$i >= 0)
1010                  {
1011                      $ancestor     = $this->openTags[$i];
1012                      $ancestorName = $ancestor->getName();
1013  
1014                      if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1015                      {
1016                          ++$this->currentFixingCost;
1017  
1018                          // We have to close this ancestor. First we reinsert this tag...
1019                          $this->tagStack[] = $tag;
1020  
1021                          // ...then we add a new end tag for it with a better priority
1022                          $this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1023  
1024                          return true;
1025                      }
1026                  }
1027              }
1028          }
1029  
1030          return false;
1031      }
1032  
1033      /**
1034      * Apply closeParent rules associated with given tag
1035      *
1036      * @param  Tag  $tag Tag
1037      * @return bool      Whether a new tag has been added
1038      */
1039  	protected function closeParent(Tag $tag)
1040      {
1041          if (!empty($this->openTags))
1042          {
1043              $tagName   = $tag->getName();
1044              $tagConfig = $this->tagsConfig[$tagName];
1045  
1046              if (!empty($tagConfig['rules']['closeParent']))
1047              {
1048                  $parent     = end($this->openTags);
1049                  $parentName = $parent->getName();
1050  
1051                  if (isset($tagConfig['rules']['closeParent'][$parentName]))
1052                  {
1053                      ++$this->currentFixingCost;
1054  
1055                      // We have to close that parent. First we reinsert the tag...
1056                      $this->tagStack[] = $tag;
1057  
1058                      // ...then we add a new end tag for it with a better priority
1059                      $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1060  
1061                      return true;
1062                  }
1063              }
1064          }
1065  
1066          return false;
1067      }
1068  
1069      /**
1070      * Apply the createChild rules associated with given tag
1071      *
1072      * @param  Tag  $tag Tag
1073      * @return void
1074      */
1075  	protected function createChild(Tag $tag)
1076      {
1077          $tagConfig = $this->tagsConfig[$tag->getName()];
1078          if (isset($tagConfig['rules']['createChild']))
1079          {
1080              $priority = -1000;
1081              $tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1082              foreach ($tagConfig['rules']['createChild'] as $tagName)
1083              {
1084                  $this->addStartTag($tagName, $tagPos, 0, ++$priority);
1085              }
1086          }
1087      }
1088  
1089      /**
1090      * Apply fosterParent rules associated with given tag
1091      *
1092      * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1093      *       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1094      *       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1095      *       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1096      *       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1097      *       loop from running indefinitely
1098      *
1099      * @param  Tag  $tag Tag
1100      * @return bool      Whether a new tag has been added
1101      */
1102  	protected function fosterParent(Tag $tag)
1103      {
1104          if (!empty($this->openTags))
1105          {
1106              $tagName   = $tag->getName();
1107              $tagConfig = $this->tagsConfig[$tagName];
1108  
1109              if (!empty($tagConfig['rules']['fosterParent']))
1110              {
1111                  $parent     = end($this->openTags);
1112                  $parentName = $parent->getName();
1113  
1114                  if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1115                  {
1116                      if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1117                      {
1118                          $this->addFosterTag($tag, $parent);
1119                      }
1120  
1121                      // Reinsert current tag
1122                      $this->tagStack[] = $tag;
1123  
1124                      // And finally close its parent with a priority that ensures it is processed
1125                      // before this tag
1126                      $this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1127  
1128                      // Adjust the fixing cost to account for the additional tags/processing
1129                      $this->currentFixingCost += 4;
1130  
1131                      return true;
1132                  }
1133              }
1134          }
1135  
1136          return false;
1137      }
1138  
1139      /**
1140      * Apply requireAncestor rules associated with given tag
1141      *
1142      * @param  Tag  $tag Tag
1143      * @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1144      */
1145  	protected function requireAncestor(Tag $tag)
1146      {
1147          $tagName   = $tag->getName();
1148          $tagConfig = $this->tagsConfig[$tagName];
1149  
1150          if (isset($tagConfig['rules']['requireAncestor']))
1151          {
1152              foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1153              {
1154                  if (!empty($this->cntOpen[$ancestorName]))
1155                  {
1156                      return false;
1157                  }
1158              }
1159  
1160              $this->logger->err('Tag requires an ancestor', [
1161                  'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1162                  'tag'             => $tag
1163              ]);
1164  
1165              return true;
1166          }
1167  
1168          return false;
1169      }
1170  
1171      //==========================================================================
1172      // Tag processing
1173      //==========================================================================
1174  
1175      /**
1176      * Create and add a copy of a tag as a child of a given tag
1177      *
1178      * @param  Tag  $tag       Current tag
1179      * @param  Tag  $fosterTag Tag to foster
1180      * @return void
1181      */
1182  	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1183      {
1184          list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1185  
1186          // Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1187          $childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1188          $tag->cascadeInvalidationTo($childTag);
1189      }
1190  
1191      /**
1192      * Create and add an end tag for given start tag at given position
1193      *
1194      * @param  Tag     $startTag Start tag
1195      * @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1196      * @param  integer $prio     End tag's priority
1197      * @return Tag
1198      */
1199  	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1200      {
1201          $tagName = $startTag->getName();
1202  
1203          // Adjust the end tag's position if whitespace is to be minimized
1204          if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1205          {
1206              $tagPos = $this->getMagicEndPos($tagPos);
1207          }
1208  
1209          // Add a 0-width end tag that is paired with the given start tag
1210          $endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1211          $endTag->pairWith($startTag);
1212  
1213          return $endTag;
1214      }
1215  
1216      /**
1217      * Compute the position of a magic end tag, adjusted for whitespace
1218      *
1219      * @param  integer $tagPos Rightmost possible position for the tag
1220      * @return integer
1221      */
1222  	protected function getMagicEndPos($tagPos)
1223      {
1224          // Back up from given position to the cursor's position until we find a character that
1225          // is not whitespace
1226          while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1227          {
1228              --$tagPos;
1229          }
1230  
1231          return $tagPos;
1232      }
1233  
1234      /**
1235      * Compute the position and priority of a magic start tag, adjusted for whitespace
1236      *
1237      * @param  integer   $tagPos Leftmost possible position for the tag
1238      * @return integer[]         [Tag pos, priority]
1239      */
1240  	protected function getMagicStartCoords($tagPos)
1241      {
1242          if (empty($this->tagStack))
1243          {
1244              // Set the next position outside the text boundaries
1245              $nextPos  = $this->textLen + 1;
1246              $nextPrio = 0;
1247          }
1248          else
1249          {
1250              $nextTag  = end($this->tagStack);
1251              $nextPos  = $nextTag->getPos();
1252              $nextPrio = $nextTag->getSortPriority();
1253          }
1254  
1255          // Find the first non-whitespace position before next tag or the end of text
1256          while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1257          {
1258              ++$tagPos;
1259          }
1260  
1261          // Set a priority that ensures this tag appears before the next tag
1262          $prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1263  
1264          return [$tagPos, $prio];
1265      }
1266  
1267      /**
1268      * Test whether given start tag is immediately followed by a closing tag
1269      *
1270      * @param  Tag  $tag Start tag
1271      * @return bool
1272      */
1273  	protected function isFollowedByClosingTag(Tag $tag)
1274      {
1275          return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1276      }
1277  
1278      /**
1279      * Process all tags in the stack
1280      *
1281      * @return void
1282      */
1283  	protected function processTags()
1284      {
1285          if (empty($this->tagStack))
1286          {
1287              return;
1288          }
1289  
1290          // Initialize the count tables
1291          foreach (array_keys($this->tagsConfig) as $tagName)
1292          {
1293              $this->cntOpen[$tagName]  = 0;
1294              $this->cntTotal[$tagName] = 0;
1295          }
1296  
1297          // Process the tag stack, close tags that were left open and repeat until done
1298          do
1299          {
1300              while (!empty($this->tagStack))
1301              {
1302                  if (!$this->tagStackIsSorted)
1303                  {
1304                      $this->sortTags();
1305                  }
1306  
1307                  $this->currentTag = array_pop($this->tagStack);
1308                  $this->processCurrentTag();
1309              }
1310  
1311              // Close tags that were left open
1312              foreach ($this->openTags as $startTag)
1313              {
1314                  // NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1315                  //       the stack is processed in LIFO order, it means that tags get closed in
1316                  //       the correct order, from descendants to ancestors
1317                  $this->addMagicEndTag($startTag, $this->textLen);
1318              }
1319          }
1320          while (!empty($this->tagStack));
1321      }
1322  
1323      /**
1324      * Process current tag
1325      *
1326      * @return void
1327      */
1328  	protected function processCurrentTag()
1329      {
1330          // Invalidate current tag if tags are disabled and current tag would not close the last open
1331          // tag and is not a system tag
1332          if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1333           && !$this->currentTag->canClose(end($this->openTags))
1334           && !$this->currentTag->isSystemTag())
1335          {
1336              $this->currentTag->invalidate();
1337          }
1338  
1339          $tagPos = $this->currentTag->getPos();
1340          $tagLen = $this->currentTag->getLen();
1341  
1342          // Test whether the cursor passed this tag's position already
1343          if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1344          {
1345              // Test whether this tag is paired with a start tag and this tag is still open
1346              $startTag = $this->currentTag->getStartTag();
1347  
1348              if ($startTag && in_array($startTag, $this->openTags, true))
1349              {
1350                  // Create an end tag that matches current tag's start tag, which consumes as much of
1351                  // the same text as current tag and is paired with the same start tag
1352                  $this->addEndTag(
1353                      $startTag->getName(),
1354                      $this->pos,
1355                      max(0, $tagPos + $tagLen - $this->pos)
1356                  )->pairWith($startTag);
1357  
1358                  // Note that current tag is not invalidated, it's merely replaced
1359                  return;
1360              }
1361  
1362              // If this is an ignore tag, try to ignore as much as the remaining text as possible
1363              if ($this->currentTag->isIgnoreTag())
1364              {
1365                  $ignoreLen = $tagPos + $tagLen - $this->pos;
1366  
1367                  if ($ignoreLen > 0)
1368                  {
1369                      // Create a new ignore tag and move on
1370                      $this->addIgnoreTag($this->pos, $ignoreLen);
1371  
1372                      return;
1373                  }
1374              }
1375  
1376              // Skipped tags are invalidated
1377              $this->currentTag->invalidate();
1378          }
1379  
1380          if ($this->currentTag->isInvalid())
1381          {
1382              return;
1383          }
1384  
1385          if ($this->currentTag->isIgnoreTag())
1386          {
1387              $this->outputIgnoreTag($this->currentTag);
1388          }
1389          elseif ($this->currentTag->isBrTag())
1390          {
1391              // Output the tag if it's allowed, ignore it otherwise
1392              if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1393              {
1394                  $this->outputBrTag($this->currentTag);
1395              }
1396          }
1397          elseif ($this->currentTag->isParagraphBreak())
1398          {
1399              $this->outputText($this->currentTag->getPos(), 0, true);
1400          }
1401          elseif ($this->currentTag->isVerbatim())
1402          {
1403              $this->outputVerbatim($this->currentTag);
1404          }
1405          elseif ($this->currentTag->isStartTag())
1406          {
1407              $this->processStartTag($this->currentTag);
1408          }
1409          else
1410          {
1411              $this->processEndTag($this->currentTag);
1412          }
1413      }
1414  
1415      /**
1416      * Process given start tag (including self-closing tags) at current position
1417      *
1418      * @param  Tag  $tag Start tag (including self-closing)
1419      * @return void
1420      */
1421  	protected function processStartTag(Tag $tag)
1422      {
1423          $tagName   = $tag->getName();
1424          $tagConfig = $this->tagsConfig[$tagName];
1425  
1426          // 1. Check that this tag has not reached its global limit tagLimit
1427          // 2. Execute this tag's filterChain, which will filter/validate its attributes
1428          // 3. Apply closeParent, closeAncestor and fosterParent rules
1429          // 4. Check for nestingLimit
1430          // 5. Apply requireAncestor rules
1431          //
1432          // This order ensures that the tag is valid and within the set limits before we attempt to
1433          // close parents or ancestors. We need to close ancestors before we can check for nesting
1434          // limits, whether this tag is allowed within current context (the context may change
1435          // as ancestors are closed) or whether the required ancestors are still there (they might
1436          // have been closed by a rule.)
1437          if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1438          {
1439              $this->logger->err(
1440                  'Tag limit exceeded',
1441                  [
1442                      'tag'      => $tag,
1443                      'tagName'  => $tagName,
1444                      'tagLimit' => $tagConfig['tagLimit']
1445                  ]
1446              );
1447              $tag->invalidate();
1448  
1449              return;
1450          }
1451  
1452          FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1453          if ($tag->isInvalid())
1454          {
1455              return;
1456          }
1457  
1458          if ($this->currentFixingCost < $this->maxFixingCost)
1459          {
1460              if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1461              {
1462                  // This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1463                  return;
1464              }
1465          }
1466  
1467          if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1468          {
1469              $this->logger->err(
1470                  'Nesting limit exceeded',
1471                  [
1472                      'tag'          => $tag,
1473                      'tagName'      => $tagName,
1474                      'nestingLimit' => $tagConfig['nestingLimit']
1475                  ]
1476              );
1477              $tag->invalidate();
1478  
1479              return;
1480          }
1481  
1482          if (!$this->tagIsAllowed($tagName))
1483          {
1484              $msg     = 'Tag is not allowed in this context';
1485              $context = ['tag' => $tag, 'tagName' => $tagName];
1486              if ($tag->getLen() > 0)
1487              {
1488                  $this->logger->warn($msg, $context);
1489              }
1490              else
1491              {
1492                  $this->logger->debug($msg, $context);
1493              }
1494              $tag->invalidate();
1495  
1496              return;
1497          }
1498  
1499          if ($this->requireAncestor($tag))
1500          {
1501              $tag->invalidate();
1502  
1503              return;
1504          }
1505  
1506          // If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1507          // immediately followed by an end tag, we replace it with a self-closing tag with the same
1508          // properties
1509          if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1510           && !$tag->isSelfClosingTag()
1511           && !$tag->getEndTag()
1512           && !$this->isFollowedByClosingTag($tag))
1513          {
1514              $newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1515              $newTag->setAttributes($tag->getAttributes());
1516              $newTag->setFlags($tag->getFlags());
1517  
1518              $tag = $newTag;
1519          }
1520  
1521          if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1522           && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1523          {
1524              $this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1525          }
1526  
1527          // This tag is valid, output it and update the context
1528          $this->outputTag($tag);
1529          $this->pushContext($tag);
1530  
1531          // Apply the createChild rules if applicable
1532          $this->createChild($tag);
1533      }
1534  
1535      /**
1536      * Process given end tag at current position
1537      *
1538      * @param  Tag  $tag end tag
1539      * @return void
1540      */
1541  	protected function processEndTag(Tag $tag)
1542      {
1543          $tagName = $tag->getName();
1544  
1545          if (empty($this->cntOpen[$tagName]))
1546          {
1547              // This is an end tag with no start tag
1548              return;
1549          }
1550  
1551          /**
1552          * @var array List of tags need to be closed before given tag
1553          */
1554          $closeTags = [];
1555  
1556          // Iterate through all open tags from last to first to find a match for our tag
1557          $i = count($this->openTags);
1558          while (--$i >= 0)
1559          {
1560              $openTag = $this->openTags[$i];
1561  
1562              if ($tag->canClose($openTag))
1563              {
1564                  break;
1565              }
1566  
1567              $closeTags[] = $openTag;
1568              ++$this->currentFixingCost;
1569          }
1570  
1571          if ($i < 0)
1572          {
1573              // Did not find a matching tag
1574              $this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1575  
1576              return;
1577          }
1578  
1579          // Accumulate flags to determine whether whitespace should be trimmed
1580          $flags = $tag->getFlags();
1581          foreach ($closeTags as $openTag)
1582          {
1583              $flags |= $openTag->getFlags();
1584          }
1585          $ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1586  
1587          // Only reopen tags if we haven't exceeded our "fixing" budget
1588          $keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1589  
1590          // Iterate over tags that are being closed, output their end tag and collect tags to be
1591          // reopened
1592          $reopenTags = [];
1593          foreach ($closeTags as $openTag)
1594          {
1595              $openTagName = $openTag->getName();
1596  
1597              // Test whether this tag should be reopened automatically
1598              if ($keepReopening)
1599              {
1600                  if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1601                  {
1602                      $reopenTags[] = $openTag;
1603                  }
1604                  else
1605                  {
1606                      $keepReopening = false;
1607                  }
1608              }
1609  
1610              // Find the earliest position we can close this open tag
1611              $tagPos = $tag->getPos();
1612              if ($ignoreWhitespace)
1613              {
1614                  $tagPos = $this->getMagicEndPos($tagPos);
1615              }
1616  
1617              // Output an end tag to close this start tag, then update the context
1618              $endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1619              $endTag->setFlags($openTag->getFlags());
1620              $this->outputTag($endTag);
1621              $this->popContext();
1622          }
1623  
1624          // Output our tag, moving the cursor past it, then update the context
1625          $this->outputTag($tag);
1626          $this->popContext();
1627  
1628          // If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1629          // close tags that are already being closed now. Also, filter our list of tags being
1630          // reopened by removing those that would immediately be closed
1631          if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1632          {
1633              /**
1634              * @var integer Rightmost position of the portion of text to ignore
1635              */
1636              $ignorePos = $this->pos;
1637  
1638              $i = count($this->tagStack);
1639              while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1640              {
1641                  $upcomingTag = $this->tagStack[$i];
1642  
1643                  // Test whether the upcoming tag is positioned at current "ignore" position and it's
1644                  // strictly an end tag (not a start tag or a self-closing tag)
1645                  if ($upcomingTag->getPos() > $ignorePos
1646                   || $upcomingTag->isStartTag())
1647                  {
1648                      break;
1649                  }
1650  
1651                  // Test whether this tag would close any of the tags we're about to reopen
1652                  $j = count($closeTags);
1653  
1654                  while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1655                  {
1656                      if ($upcomingTag->canClose($closeTags[$j]))
1657                      {
1658                          // Remove the tag from the lists and reset the keys
1659                          array_splice($closeTags, $j, 1);
1660  
1661                          if (isset($reopenTags[$j]))
1662                          {
1663                              array_splice($reopenTags, $j, 1);
1664                          }
1665  
1666                          // Extend the ignored text to cover this tag
1667                          $ignorePos = max(
1668                              $ignorePos,
1669                              $upcomingTag->getPos() + $upcomingTag->getLen()
1670                          );
1671  
1672                          break;
1673                      }
1674                  }
1675              }
1676  
1677              if ($ignorePos > $this->pos)
1678              {
1679                  /**
1680                  * @todo have a method that takes (pos,len) rather than a Tag
1681                  */
1682                  $this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1683              }
1684          }
1685  
1686          // Re-add tags that need to be reopened, at current cursor position
1687          foreach ($reopenTags as $startTag)
1688          {
1689              $newTag = $this->addCopyTag($startTag, $this->pos, 0);
1690  
1691              // Re-pair the new tag
1692              $endTag = $startTag->getEndTag();
1693              if ($endTag)
1694              {
1695                  $newTag->pairWith($endTag);
1696              }
1697          }
1698      }
1699  
1700      /**
1701      * Update counters and replace current context with its parent context
1702      *
1703      * @return void
1704      */
1705  	protected function popContext()
1706      {
1707          $tag = array_pop($this->openTags);
1708          --$this->cntOpen[$tag->getName()];
1709          $this->context = $this->context['parentContext'];
1710      }
1711  
1712      /**
1713      * Update counters and replace current context with a new context based on given tag
1714      *
1715      * If given tag is a self-closing tag, the context won't change
1716      *
1717      * @param  Tag  $tag Start tag (including self-closing)
1718      * @return void
1719      */
1720  	protected function pushContext(Tag $tag)
1721      {
1722          $tagName   = $tag->getName();
1723          $tagFlags  = $tag->getFlags();
1724          $tagConfig = $this->tagsConfig[$tagName];
1725  
1726          ++$this->cntTotal[$tagName];
1727  
1728          // If this is a self-closing tag, the context remains the same
1729          if ($tag->isSelfClosingTag())
1730          {
1731              return;
1732          }
1733  
1734          // Recompute the allowed tags
1735          $allowed = [];
1736          foreach ($this->context['allowed'] as $k => $v)
1737          {
1738              // If the current tag is not transparent, override the low bits (allowed children) of
1739              // current context with its high bits (allowed descendants)
1740              if (!($tagFlags & self::RULE_IS_TRANSPARENT))
1741              {
1742                  $v = ($v & 0xFF00) | ($v >> 8);
1743              }
1744              $allowed[] = $tagConfig['allowed'][$k] & $v;
1745          }
1746  
1747          // Use this tag's flags as a base for this context and add inherited rules
1748          $flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1749  
1750          // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1751          if ($flags & self::RULE_DISABLE_AUTO_BR)
1752          {
1753              $flags &= ~self::RULE_ENABLE_AUTO_BR;
1754          }
1755  
1756          ++$this->cntOpen[$tagName];
1757          $this->openTags[] = $tag;
1758          $this->context = [
1759              'allowed'       => $allowed,
1760              'flags'         => $flags,
1761              'inParagraph'   => false,
1762              'parentContext' => $this->context
1763          ];
1764      }
1765  
1766      /**
1767      * Return whether given tag is allowed in current context
1768      *
1769      * @param  string $tagName
1770      * @return bool
1771      */
1772  	protected function tagIsAllowed($tagName)
1773      {
1774          $n = $this->tagsConfig[$tagName]['bitNumber'];
1775  
1776          return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1777      }
1778  
1779      //==========================================================================
1780      // Tag stack
1781      //==========================================================================
1782  
1783      /**
1784      * Add a start tag
1785      *
1786      * @param  string  $name Name of the tag
1787      * @param  integer $pos  Position of the tag in the text
1788      * @param  integer $len  Length of text consumed by the tag
1789      * @param  integer $prio Tag's priority
1790      * @return Tag
1791      */
1792  	public function addStartTag($name, $pos, $len, $prio = 0)
1793      {
1794          return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1795      }
1796  
1797      /**
1798      * Add an end tag
1799      *
1800      * @param  string  $name Name of the tag
1801      * @param  integer $pos  Position of the tag in the text
1802      * @param  integer $len  Length of text consumed by the tag
1803      * @param  integer $prio Tag's priority
1804      * @return Tag
1805      */
1806  	public function addEndTag($name, $pos, $len, $prio = 0)
1807      {
1808          return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1809      }
1810  
1811      /**
1812      * Add a self-closing tag
1813      *
1814      * @param  string  $name Name of the tag
1815      * @param  integer $pos  Position of the tag in the text
1816      * @param  integer $len  Length of text consumed by the tag
1817      * @param  integer $prio Tag's priority
1818      * @return Tag
1819      */
1820  	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1821      {
1822          return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1823      }
1824  
1825      /**
1826      * Add a 0-width "br" tag to force a line break at given position
1827      *
1828      * @param  integer $pos  Position of the tag in the text
1829      * @param  integer $prio Tag's priority
1830      * @return Tag
1831      */
1832  	public function addBrTag($pos, $prio = 0)
1833      {
1834          return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1835      }
1836  
1837      /**
1838      * Add an "ignore" tag
1839      *
1840      * @param  integer $pos  Position of the tag in the text
1841      * @param  integer $len  Length of text consumed by the tag
1842      * @param  integer $prio Tag's priority
1843      * @return Tag
1844      */
1845  	public function addIgnoreTag($pos, $len, $prio = 0)
1846      {
1847          return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1848      }
1849  
1850      /**
1851      * Add a paragraph break at given position
1852      *
1853      * Uses a zero-width tag that is actually never output in the result
1854      *
1855      * @param  integer $pos  Position of the tag in the text
1856      * @param  integer $prio Tag's priority
1857      * @return Tag
1858      */
1859  	public function addParagraphBreak($pos, $prio = 0)
1860      {
1861          return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1862      }
1863  
1864      /**
1865      * Add a copy of given tag at given position and length
1866      *
1867      * @param  Tag     $tag  Original tag
1868      * @param  integer $pos  Copy's position
1869      * @param  integer $len  Copy's length
1870      * @param  integer $prio Copy's priority (same as original by default)
1871      * @return Tag           Copy tag
1872      */
1873  	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1874      {
1875          if (!isset($prio))
1876          {
1877              $prio = $tag->getSortPriority();
1878          }
1879          $copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1880          $copy->setAttributes($tag->getAttributes());
1881  
1882          return $copy;
1883      }
1884  
1885      /**
1886      * Add a tag
1887      *
1888      * @param  integer $type Tag's type
1889      * @param  string  $name Name of the tag
1890      * @param  integer $pos  Position of the tag in the text
1891      * @param  integer $len  Length of text consumed by the tag
1892      * @param  integer $prio Tag's priority
1893      * @return Tag
1894      */
1895  	protected function addTag($type, $name, $pos, $len, $prio)
1896      {
1897          // Create the tag
1898          $tag = new Tag($type, $name, $pos, $len, $prio);
1899  
1900          // Set this tag's rules bitfield
1901          if (isset($this->tagsConfig[$name]))
1902          {
1903              $tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1904          }
1905  
1906          // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1907          // position is negative or if it's out of bounds
1908          if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1909           || $this->isInvalidTextSpan($pos, $len))
1910          {
1911              $tag->invalidate();
1912          }
1913          elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1914          {
1915              $this->logger->warn(
1916                  'Tag is disabled',
1917                  [
1918                      'tag'     => $tag,
1919                      'tagName' => $name
1920                  ]
1921              );
1922              $tag->invalidate();
1923          }
1924          else
1925          {
1926              $this->insertTag($tag);
1927          }
1928  
1929          return $tag;
1930      }
1931  
1932      /**
1933      * Test whether given text span is outside text boundaries or an invalid UTF sequence
1934      *
1935      * @param  integer $pos Start of text
1936      * @param  integer $len Length of text
1937      * @return bool
1938      */
1939  	protected function isInvalidTextSpan($pos, $len)
1940      {
1941          return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1942      }
1943  
1944      /**
1945      * Insert given tag in the tag stack
1946      *
1947      * @param  Tag  $tag
1948      * @return void
1949      */
1950  	protected function insertTag(Tag $tag)
1951      {
1952          if (!$this->tagStackIsSorted)
1953          {
1954              $this->tagStack[] = $tag;
1955          }
1956          else
1957          {
1958              // Scan the stack and copy every tag to the next slot until we find the correct index
1959              $i   = count($this->tagStack);
1960              $key = $this->getSortKey($tag);
1961              while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1962              {
1963                  $this->tagStack[$i] = $this->tagStack[$i - 1];
1964                  --$i;
1965              }
1966              $this->tagStack[$i] = $tag;
1967          }
1968      }
1969  
1970      /**
1971      * Add a pair of tags
1972      *
1973      * @param  string  $name     Name of the tags
1974      * @param  integer $startPos Position of the start tag
1975      * @param  integer $startLen Length of the start tag
1976      * @param  integer $endPos   Position of the start tag
1977      * @param  integer $endLen   Length of the start tag
1978      * @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1979      * @return Tag               Start tag
1980      */
1981  	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1982      {
1983          // NOTE: the end tag is added first to try to keep the stack in the correct order
1984          $endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1985          $startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1986          $startTag->pairWith($endTag);
1987  
1988          return $startTag;
1989      }
1990  
1991      /**
1992      * Add a tag that represents a verbatim copy of the original text
1993      *
1994      * @param  integer $pos  Position of the tag in the text
1995      * @param  integer $len  Length of text consumed by the tag
1996      * @param  integer $prio Tag's priority
1997      * @return Tag
1998      */
1999  	public function addVerbatim($pos, $len, $prio = 0)
2000      {
2001          return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
2002      }
2003  
2004      /**
2005      * Sort tags by position and precedence
2006      *
2007      * @return void
2008      */
2009  	protected function sortTags()
2010      {
2011          $arr = [];
2012          foreach ($this->tagStack as $i => $tag)
2013          {
2014              $key       = $this->getSortKey($tag, $i);
2015              $arr[$key] = $tag;
2016          }
2017          krsort($arr);
2018  
2019          $this->tagStack         = array_values($arr);
2020          $this->tagStackIsSorted = true;
2021      }
2022  
2023      /**
2024      * Generate a key for given tag that can be used to compare its position using lexical comparisons
2025      *
2026      * Tags are sorted by position first, then by priority, then by whether they consume any text,
2027      * then by length, and finally in order of their creation.
2028      *
2029      * The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2030      * are at the end of the array.
2031      *
2032      * @param  Tag     $tag
2033      * @param  integer $tagIndex
2034      * @return string
2035      */
2036  	protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2037      {
2038          // Ensure that negative values are sorted correctly by flagging them and making them positive
2039          $prioFlag = ($tag->getSortPriority() >= 0);
2040          $prio     = $tag->getSortPriority();
2041          if (!$prioFlag)
2042          {
2043              $prio += (1 << 30);
2044          }
2045  
2046          // Sort 0-width tags separately from the rest
2047          $lenFlag = ($tag->getLen() > 0);
2048          if ($lenFlag)
2049          {
2050              // Inverse their length so that longest matches are processed first
2051              $lenOrder = $this->textLen - $tag->getLen();
2052          }
2053          else
2054          {
2055              // Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2056              // pairs
2057              $order = [
2058                  Tag::END_TAG          => 0,
2059                  Tag::SELF_CLOSING_TAG => 1,
2060                  Tag::START_TAG        => 2
2061              ];
2062              $lenOrder = $order[$tag->getType()];
2063          }
2064  
2065          return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2066      }
2067  }


Generated: Sun Feb 14 20:08:31 2021 Cross-referenced by PHPXref 0.7.1