[ Index ] |
PHP Cross Reference of phpBB-3.3.14-deutsch |
[Summary view] [Print] [Text view]
1 /**#@+ 2 * Boolean rules bitfield 3 */ 4 /** @const */ var RULE_AUTO_CLOSE = 1 << 0; 5 /** @const */ var RULE_AUTO_REOPEN = 1 << 1; 6 /** @const */ var RULE_BREAK_PARAGRAPH = 1 << 2; 7 /** @const */ var RULE_CREATE_PARAGRAPHS = 1 << 3; 8 /** @const */ var RULE_DISABLE_AUTO_BR = 1 << 4; 9 /** @const */ var RULE_ENABLE_AUTO_BR = 1 << 5; 10 /** @const */ var RULE_IGNORE_TAGS = 1 << 6; 11 /** @const */ var RULE_IGNORE_TEXT = 1 << 7; 12 /** @const */ var RULE_IGNORE_WHITESPACE = 1 << 8; 13 /** @const */ var RULE_IS_TRANSPARENT = 1 << 9; 14 /** @const */ var RULE_PREVENT_BR = 1 << 10; 15 /** @const */ var RULE_SUSPEND_AUTO_BR = 1 << 11; 16 /** @const */ var RULE_TRIM_FIRST_LINE = 1 << 12; 17 /**#@-*/ 18 19 /** 20 * @const Bitwise disjunction of rules related to automatic line breaks 21 */ 22 var RULES_AUTO_LINEBREAKS = RULE_DISABLE_AUTO_BR | RULE_ENABLE_AUTO_BR | RULE_SUSPEND_AUTO_BR; 23 24 /** 25 * @const Bitwise disjunction of rules that are inherited by subcontexts 26 */ 27 var RULES_INHERITANCE = RULE_ENABLE_AUTO_BR; 28 29 /** 30 * @const All the characters that are considered whitespace 31 */ 32 var WHITESPACE = " \n\t"; 33 34 /** 35 * @type {!Object.<string,number>} Number of open tags for each tag name 36 */ 37 var cntOpen; 38 39 /** 40 * @type {!Object.<string,number>} Number of times each tag has been used 41 */ 42 var cntTotal; 43 44 /** 45 * @type {!Object} Current context 46 */ 47 var context; 48 49 /** 50 * @type {number} How hard the parser has worked on fixing bad markup so far 51 */ 52 var currentFixingCost; 53 54 /** 55 * @type {?Tag} Current tag being processed 56 */ 57 var currentTag; 58 59 /** 60 * @type {boolean} Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/> 61 */ 62 var isRich; 63 64 /** 65 * @type {!Logger} This parser's logger 66 */ 67 var logger = new Logger; 68 69 /** 70 * @type {number} How hard the parser should work on fixing bad markup 71 */ 72 var maxFixingCost = 10000; 73 74 /** 75 * @type {!Object} Associative array of namespace prefixes in use in document (prefixes used as key) 76 */ 77 var namespaces; 78 79 /** 80 * @type {!Array.<!Tag>} Stack of open tags (instances of Tag) 81 */ 82 var openTags; 83 84 /** 85 * @type {string} This parser's output 86 */ 87 var output; 88 89 /** 90 * @type {!Object.<!Object>} 91 */ 92 var plugins; 93 94 /** 95 * @type {number} Position of the cursor in the original text 96 */ 97 var pos; 98 99 /** 100 * @type {!Object} Variables registered for use in filters 101 */ 102 var registeredVars; 103 104 /** 105 * @type {!Object} Root context, used at the root of the document 106 */ 107 var rootContext; 108 109 /** 110 * @type {!Object} Tags' config 111 * @const 112 */ 113 var tagsConfig; 114 115 /** 116 * @type {!Array.<!Tag>} Tag storage 117 */ 118 var tagStack; 119 120 /** 121 * @type {boolean} Whether the tags in the stack are sorted 122 */ 123 var tagStackIsSorted; 124 125 /** 126 * @type {string} Text being parsed 127 */ 128 var text; 129 130 /** 131 * @type {number} Length of the text being parsed 132 */ 133 var textLen; 134 135 /** 136 * @type {number} Counter incremented everytime the parser is reset. Used to as a canary to detect 137 * whether the parser was reset during execution 138 */ 139 var uid = 0; 140 141 /** 142 * @type {number} Position before which we output text verbatim, without paragraphs or linebreaks 143 */ 144 var wsPos; 145 146 //========================================================================== 147 // Public API 148 //========================================================================== 149 150 /** 151 * Disable a tag 152 * 153 * @param {string} tagName Name of the tag 154 */ 155 function disableTag(tagName) 156 { 157 if (tagsConfig[tagName]) 158 { 159 copyTagConfig(tagName).isDisabled = true; 160 } 161 } 162 163 /** 164 * Enable a tag 165 * 166 * @param {string} tagName Name of the tag 167 */ 168 function enableTag(tagName) 169 { 170 if (tagsConfig[tagName]) 171 { 172 copyTagConfig(tagName).isDisabled = false; 173 } 174 } 175 176 /** 177 * Get this parser's Logger instance 178 * 179 * @return {!Logger} 180 */ 181 function getLogger() 182 { 183 return logger; 184 } 185 186 /** 187 * Parse a text 188 * 189 * @param {string} _text Text to parse 190 * @return {string} XML representation 191 */ 192 function parse(_text) 193 { 194 // Reset the parser and save the uid 195 reset(_text); 196 var _uid = uid; 197 198 // Do the heavy lifting 199 executePluginParsers(); 200 processTags(); 201 202 // Finalize the document 203 finalizeOutput(); 204 205 // Check the uid in case a plugin or a filter reset the parser mid-execution 206 if (uid !== _uid) 207 { 208 throw 'The parser has been reset during execution'; 209 } 210 211 // Log a warning if the fixing cost limit was exceeded 212 if (currentFixingCost > maxFixingCost) 213 { 214 logger.warn('Fixing cost limit exceeded'); 215 } 216 217 return output; 218 } 219 220 /** 221 * Reset the parser for a new parsing 222 * 223 * @param {string} _text Text to be parsed 224 */ 225 function reset(_text) 226 { 227 // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML 228 _text = _text.replace(/\r\n?/g, "\n"); 229 _text = _text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]+/g, ''); 230 231 // Clear the logs 232 logger.clear(); 233 234 // Initialize the rest 235 cntOpen = {}; 236 cntTotal = {}; 237 currentFixingCost = 0; 238 currentTag = null; 239 isRich = false; 240 namespaces = {}; 241 openTags = []; 242 output = ''; 243 pos = 0; 244 tagStack = []; 245 tagStackIsSorted = false; 246 text = _text; 247 textLen = text.length; 248 wsPos = 0; 249 250 // Initialize the root context 251 context = rootContext; 252 context.inParagraph = false; 253 254 // Bump the UID 255 ++uid; 256 } 257 258 /** 259 * Change a tag's tagLimit 260 * 261 * NOTE: the default tagLimit should generally be set during configuration instead 262 * 263 * @param {string} tagName The tag's name, in UPPERCASE 264 * @param {number} tagLimit 265 */ 266 function setTagLimit(tagName, tagLimit) 267 { 268 if (tagsConfig[tagName]) 269 { 270 copyTagConfig(tagName).tagLimit = tagLimit; 271 } 272 } 273 274 /** 275 * Change a tag's nestingLimit 276 * 277 * NOTE: the default nestingLimit should generally be set during configuration instead 278 * 279 * @param {string} tagName The tag's name, in UPPERCASE 280 * @param {number} nestingLimit 281 */ 282 function setNestingLimit(tagName, nestingLimit) 283 { 284 if (tagsConfig[tagName]) 285 { 286 copyTagConfig(tagName).nestingLimit = nestingLimit; 287 } 288 } 289 290 /** 291 * Copy a tag's config 292 * 293 * This method ensures that the tag's config is its own object and not shared with another 294 * identical tag 295 * 296 * @param {string} tagName Tag's name 297 * @return {!Object} Tag's config 298 */ 299 function copyTagConfig(tagName) 300 { 301 var tagConfig = {}, k; 302 for (k in tagsConfig[tagName]) 303 { 304 tagConfig[k] = tagsConfig[tagName][k]; 305 } 306 307 return tagsConfig[tagName] = tagConfig; 308 } 309 310 //========================================================================== 311 // Output handling 312 //========================================================================== 313 314 /** 315 * Replace Unicode characters outside the BMP with XML entities in the output 316 */ 317 function encodeUnicodeSupplementaryCharacters() 318 { 319 output = output.replace( 320 /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, 321 encodeUnicodeSupplementaryCharactersCallback 322 ); 323 } 324 325 /** 326 * Encode given surrogate pair into an XML entity 327 * 328 * @param {string} pair Surrogate pair 329 * @return {string} XML entity 330 */ 331 function encodeUnicodeSupplementaryCharactersCallback(pair) 332 { 333 var cp = (pair.charCodeAt(0) << 10) + pair.charCodeAt(1) - 56613888; 334 335 return '&#' + cp + ';'; 336 } 337 338 /** 339 * Finalize the output by appending the rest of the unprocessed text and create the root node 340 */ 341 function finalizeOutput() 342 { 343 var tmp; 344 345 // Output the rest of the text and close the last paragraph 346 outputText(textLen, 0, true); 347 348 // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs 349 do 350 { 351 tmp = output; 352 output = output.replace(/<([^ />]+)[^>]*><\/\1>/g, ''); 353 } 354 while (output !== tmp); 355 356 // Merge consecutive <i> tags 357 output = output.replace(/<\/i><i>/g, ''); 358 359 // Remove control characters from the output to ensure it's valid XML 360 output = output.replace(/[\x00-\x08\x0B-\x1F]/g, ''); 361 362 // Encode Unicode characters that are outside of the BMP 363 encodeUnicodeSupplementaryCharacters(); 364 365 // Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>) 366 var tagName = (isRich) ? 'r' : 't'; 367 368 // Prepare the root node with all the namespace declarations 369 tmp = '<' + tagName; 370 if (HINT.namespaces) 371 { 372 for (var prefix in namespaces) 373 { 374 tmp += ' xmlns:' + prefix + '="urn:s9e:TextFormatter:' + prefix + '"'; 375 } 376 } 377 378 output = tmp + '>' + output + '</' + tagName + '>'; 379 } 380 381 /** 382 * Append a tag to the output 383 * 384 * @param {!Tag} tag Tag to append 385 */ 386 function outputTag(tag) 387 { 388 isRich = true; 389 390 var tagName = tag.getName(), 391 tagPos = tag.getPos(), 392 tagLen = tag.getLen(), 393 tagFlags = tag.getFlags(), 394 skipBefore = 0, 395 skipAfter = 0; 396 397 if (HINT.RULE_IGNORE_WHITESPACE && (tagFlags & RULE_IGNORE_WHITESPACE)) 398 { 399 skipBefore = 1; 400 skipAfter = (tag.isEndTag()) ? 2 : 1; 401 } 402 403 // Current paragraph must end before the tag if: 404 // - the tag is a start (or self-closing) tag and it breaks paragraphs, or 405 // - the tag is an end tag (but not self-closing) 406 var closeParagraph = !!(!tag.isStartTag() || (HINT.RULE_BREAK_PARAGRAPH && (tagFlags & RULE_BREAK_PARAGRAPH))); 407 408 // Let the cursor catch up with this tag's position 409 outputText(tagPos, skipBefore, closeParagraph); 410 411 // Capture the text consumed by the tag 412 var tagText = (tagLen) 413 ? htmlspecialchars_noquotes(text.substring(tagPos, tagPos + tagLen)) 414 : ''; 415 416 // Output current tag 417 if (tag.isStartTag()) 418 { 419 // Handle paragraphs before opening the tag 420 if (!HINT.RULE_BREAK_PARAGRAPH || !(tagFlags & RULE_BREAK_PARAGRAPH)) 421 { 422 outputParagraphStart(tagPos); 423 } 424 425 // Record this tag's namespace, if applicable 426 if (HINT.namespaces) 427 { 428 var colonPos = tagName.indexOf(':'); 429 if (colonPos > 0) 430 { 431 namespaces[tagName.substring(0, colonPos)] = 0; 432 } 433 } 434 435 // Open the start tag and add its attributes, but don't close the tag 436 output += '<' + tagName; 437 438 // We output the attributes in lexical order. Helps canonicalizing the output and could 439 // prove useful someday 440 var attributes = tag.getAttributes(), 441 attributeNames = []; 442 for (var attrName in attributes) 443 { 444 attributeNames.push(attrName); 445 } 446 attributeNames.sort( 447 function(a, b) 448 { 449 return (a > b) ? 1 : -1; 450 } 451 ); 452 attributeNames.forEach( 453 function(attrName) 454 { 455 output += ' ' + attrName + '="' + htmlspecialchars_compat(attributes[attrName].toString()).replace(/\n/g, ' ') + '"'; 456 } 457 ); 458 459 if (tag.isSelfClosingTag()) 460 { 461 if (tagLen) 462 { 463 output += '>' + tagText + '</' + tagName + '>'; 464 } 465 else 466 { 467 output += '/>'; 468 } 469 } 470 else if (tagLen) 471 { 472 output += '><s>' + tagText + '</s>'; 473 } 474 else 475 { 476 output += '>'; 477 } 478 } 479 else 480 { 481 if (tagLen) 482 { 483 output += '<e>' + tagText + '</e>'; 484 } 485 486 output += '</' + tagName + '>'; 487 } 488 489 // Move the cursor past the tag 490 pos = tagPos + tagLen; 491 492 // Skip newlines (no other whitespace) after this tag 493 wsPos = pos; 494 while (skipAfter && wsPos < textLen && text[wsPos] === "\n") 495 { 496 // Decrement the number of lines to skip 497 --skipAfter; 498 499 // Move the cursor past the newline 500 ++wsPos; 501 } 502 } 503 504 /** 505 * Output the text between the cursor's position (included) and given position (not included) 506 * 507 * @param {number} catchupPos Position we're catching up to 508 * @param {number} maxLines Maximum number of lines to ignore at the end of the text 509 * @param {boolean} closeParagraph Whether to close the paragraph at the end, if applicable 510 */ 511 function outputText(catchupPos, maxLines, closeParagraph) 512 { 513 if (closeParagraph) 514 { 515 if (!(context.flags & RULE_CREATE_PARAGRAPHS)) 516 { 517 closeParagraph = false; 518 } 519 else 520 { 521 // Ignore any number of lines at the end if we're closing a paragraph 522 maxLines = -1; 523 } 524 } 525 526 if (pos >= catchupPos) 527 { 528 // We're already there, close the paragraph if applicable and return 529 if (closeParagraph) 530 { 531 outputParagraphEnd(); 532 } 533 } 534 535 // Skip over previously identified whitespace if applicable 536 if (wsPos > pos) 537 { 538 var skipPos = Math.min(catchupPos, wsPos); 539 output += text.substring(pos, skipPos); 540 pos = skipPos; 541 542 if (pos >= catchupPos) 543 { 544 // Skipped everything. Close the paragraph if applicable and return 545 if (closeParagraph) 546 { 547 outputParagraphEnd(); 548 } 549 } 550 } 551 552 var catchupText; 553 554 // Test whether we're even supposed to output anything 555 if (HINT.RULE_IGNORE_TEXT && context.flags & RULE_IGNORE_TEXT) 556 { 557 catchupText = text.substring(pos, catchupPos); 558 559 // If the catchup text is not entirely composed of whitespace, we put it inside ignore tags 560 if (!/^[ \n\t]*$/.test(catchupText)) 561 { 562 catchupText = '<i>' + htmlspecialchars_noquotes(catchupText) + '</i>'; 563 } 564 565 output += catchupText; 566 pos = catchupPos; 567 568 if (closeParagraph) 569 { 570 outputParagraphEnd(); 571 } 572 573 return; 574 } 575 576 // Compute the amount of text to ignore at the end of the output 577 var ignorePos = catchupPos, 578 ignoreLen = 0; 579 580 // Ignore as many lines (including whitespace) as specified 581 while (maxLines && --ignorePos >= pos) 582 { 583 var c = text[ignorePos]; 584 if (c !== ' ' && c !== "\n" && c !== "\t") 585 { 586 break; 587 } 588 589 if (c === "\n") 590 { 591 --maxLines; 592 } 593 594 ++ignoreLen; 595 } 596 597 // Adjust catchupPos to ignore the text at the end 598 catchupPos -= ignoreLen; 599 600 // Break down the text in paragraphs if applicable 601 if (HINT.RULE_CREATE_PARAGRAPHS && context.flags & RULE_CREATE_PARAGRAPHS) 602 { 603 if (!context.inParagraph) 604 { 605 outputWhitespace(catchupPos); 606 607 if (catchupPos > pos) 608 { 609 outputParagraphStart(catchupPos); 610 } 611 } 612 613 // Look for a paragraph break in this text 614 var pbPos = text.indexOf("\n\n", pos); 615 616 while (pbPos > -1 && pbPos < catchupPos) 617 { 618 outputText(pbPos, 0, true); 619 outputParagraphStart(catchupPos); 620 621 pbPos = text.indexOf("\n\n", pos); 622 } 623 } 624 625 // Capture, escape and output the text 626 if (catchupPos > pos) 627 { 628 catchupText = htmlspecialchars_noquotes( 629 text.substring(pos, catchupPos) 630 ); 631 632 // Format line breaks if applicable 633 if (HINT.RULE_ENABLE_AUTO_BR && (context.flags & RULES_AUTO_LINEBREAKS) === RULE_ENABLE_AUTO_BR) 634 { 635 catchupText = catchupText.replace(/\n/g, "<br/>\n"); 636 } 637 638 output += catchupText; 639 } 640 641 // Close the paragraph if applicable 642 if (closeParagraph) 643 { 644 outputParagraphEnd(); 645 } 646 647 // Add the ignored text if applicable 648 if (ignoreLen) 649 { 650 output += text.substring(catchupPos, catchupPos + ignoreLen); 651 } 652 653 // Move the cursor past the text 654 pos = catchupPos + ignoreLen; 655 } 656 657 /** 658 * Output a linebreak tag 659 * 660 * @param {!Tag} tag 661 */ 662 function outputBrTag(tag) 663 { 664 outputText(tag.getPos(), 0, false); 665 output += '<br/>'; 666 } 667 668 /** 669 * Output an ignore tag 670 * 671 * @param {!Tag} tag 672 */ 673 function outputIgnoreTag(tag) 674 { 675 var tagPos = tag.getPos(), 676 tagLen = tag.getLen(); 677 678 // Capture the text to ignore 679 var ignoreText = text.substring(tagPos, tagPos + tagLen); 680 681 // Catch up with the tag's position then output the tag 682 outputText(tagPos, 0, false); 683 output += '<i>' + htmlspecialchars_noquotes(ignoreText) + '</i>'; 684 isRich = true; 685 686 // Move the cursor past this tag 687 pos = tagPos + tagLen; 688 } 689 690 /** 691 * Start a paragraph between current position and given position, if applicable 692 * 693 * @param {number} maxPos Rightmost position at which the paragraph can be opened 694 */ 695 function outputParagraphStart(maxPos) 696 { 697 if (!HINT.RULE_CREATE_PARAGRAPHS) 698 { 699 return; 700 } 701 702 // Do nothing if we're already in a paragraph, or if we don't use paragraphs 703 if (context.inParagraph 704 || !(context.flags & RULE_CREATE_PARAGRAPHS)) 705 { 706 return; 707 } 708 709 // Output the whitespace between pos and maxPos if applicable 710 outputWhitespace(maxPos); 711 712 // Open the paragraph, but only if it's not at the very end of the text 713 if (pos < textLen) 714 { 715 output += '<p>'; 716 context.inParagraph = true; 717 } 718 } 719 720 /** 721 * Close current paragraph at current position if applicable 722 */ 723 function outputParagraphEnd() 724 { 725 // Do nothing if we're not in a paragraph 726 if (!context.inParagraph) 727 { 728 return; 729 } 730 731 output += '</p>'; 732 context.inParagraph = false; 733 } 734 735 /** 736 * Output the content of a verbatim tag 737 * 738 * @param {!Tag} tag 739 */ 740 function outputVerbatim(tag) 741 { 742 var flags = context.flags; 743 context.flags = tag.getFlags(); 744 outputText(currentTag.getPos() + currentTag.getLen(), 0, false); 745 context.flags = flags; 746 } 747 748 /** 749 * Skip as much whitespace after current position as possible 750 * 751 * @param {number} maxPos Rightmost character to be skipped 752 */ 753 function outputWhitespace(maxPos) 754 { 755 while (pos < maxPos && " \n\t".indexOf(text[pos]) > -1) 756 { 757 output += text[pos]; 758 ++pos; 759 } 760 } 761 762 //========================================================================== 763 // Plugins handling 764 //========================================================================== 765 766 /** 767 * Disable a plugin 768 * 769 * @param {string} pluginName Name of the plugin 770 */ 771 function disablePlugin(pluginName) 772 { 773 if (plugins[pluginName]) 774 { 775 plugins[pluginName].isDisabled = true; 776 } 777 } 778 779 /** 780 * Enable a plugin 781 * 782 * @param {string} pluginName Name of the plugin 783 */ 784 function enablePlugin(pluginName) 785 { 786 if (plugins[pluginName]) 787 { 788 plugins[pluginName].isDisabled = false; 789 } 790 } 791 792 /** 793 * Execute given plugin 794 * 795 * @param {string} pluginName Plugin's name 796 */ 797 function executePluginParser(pluginName) 798 { 799 var pluginConfig = plugins[pluginName]; 800 if (pluginConfig.quickMatch && text.indexOf(pluginConfig.quickMatch) < 0) 801 { 802 return; 803 } 804 805 var matches = []; 806 if (HINT.regexp && HINT.regexpLimit && typeof pluginConfig.regexp !== 'undefined' && typeof pluginConfig.regexpLimit !== 'undefined') 807 { 808 matches = getMatches(pluginConfig.regexp, pluginConfig.regexpLimit); 809 if (!matches.length) 810 { 811 return; 812 } 813 } 814 815 // Execute the plugin's parser, which will add tags via addStartTag() and others 816 getPluginParser(pluginName)(text, matches); 817 } 818 819 /** 820 * Execute all the plugins 821 */ 822 function executePluginParsers() 823 { 824 for (var pluginName in plugins) 825 { 826 if (!plugins[pluginName].isDisabled) 827 { 828 executePluginParser(pluginName); 829 } 830 } 831 } 832 833 /** 834 * Get regexp matches in a manner similar to preg_match_all() with PREG_SET_ORDER | PREG_OFFSET_CAPTURE 835 * 836 * @param {!RegExp} regexp 837 * @param {number} limit 838 * @return {!Array.<!Array>} 839 */ 840 function getMatches(regexp, limit) 841 { 842 // Reset the regexp 843 regexp.lastIndex = 0; 844 var matches = [], cnt = 0, m; 845 while (++cnt <= limit && (m = regexp.exec(text))) 846 { 847 // NOTE: coercing m.index to a number because Closure Compiler thinks pos is a string otherwise 848 var pos = m.index, 849 match = [[m[0], pos]], 850 i = 0; 851 while (++i < m.length) 852 { 853 var str = m[i]; 854 855 // Sub-expressions that were not evaluated return undefined 856 if (str === undefined) 857 { 858 match.push(['', -1]); 859 } 860 else 861 { 862 match.push([str, text.indexOf(str, pos)]); 863 pos += str.length; 864 } 865 } 866 867 matches.push(match); 868 } 869 870 return matches; 871 } 872 873 /** 874 * Get the callback for given plugin's parser 875 * 876 * @param {string} pluginName 877 * @return {function(string, !Array)} 878 */ 879 function getPluginParser(pluginName) 880 { 881 return plugins[pluginName].parser; 882 } 883 884 /** 885 * Register a parser 886 * 887 * Can be used to add a new parser with no plugin config, or pre-generate a parser for an 888 * existing plugin 889 * 890 * @param {string} pluginName 891 * @param {!Function} parser 892 * @param {?RegExp=} regexp 893 * @param {number=} limit 894 */ 895 function registerParser(pluginName, parser, regexp, limit) 896 { 897 // Create an empty config for this plugin to ensure it is executed 898 if (!plugins[pluginName]) 899 { 900 plugins[pluginName] = {}; 901 } 902 if (regexp) 903 { 904 plugins[pluginName].regexp = regexp; 905 plugins[pluginName].limit = limit || Infinity; 906 } 907 plugins[pluginName].parser = parser; 908 } 909 910 //========================================================================== 911 // Rules handling 912 //========================================================================== 913 914 /** 915 * Apply closeAncestor rules associated with given tag 916 * 917 * @param {!Tag} tag Tag 918 * @return {boolean} Whether a new tag has been added 919 */ 920 function closeAncestor(tag) 921 { 922 if (!HINT.closeAncestor) 923 { 924 return false; 925 } 926 927 if (openTags.length) 928 { 929 var tagName = tag.getName(), 930 tagConfig = tagsConfig[tagName]; 931 932 if (tagConfig.rules.closeAncestor) 933 { 934 var i = openTags.length; 935 936 while (--i >= 0) 937 { 938 var ancestor = openTags[i], 939 ancestorName = ancestor.getName(); 940 941 if (tagConfig.rules.closeAncestor[ancestorName]) 942 { 943 ++currentFixingCost; 944 945 // We have to close this ancestor. First we reinsert this tag... 946 tagStack.push(tag); 947 948 // ...then we add a new end tag for it with a better priority 949 addMagicEndTag(ancestor, tag.getPos(), tag.getSortPriority() - 1); 950 951 return true; 952 } 953 } 954 } 955 } 956 957 return false; 958 } 959 960 /** 961 * Apply closeParent rules associated with given tag 962 * 963 * @param {!Tag} tag Tag 964 * @return {boolean} Whether a new tag has been added 965 */ 966 function closeParent(tag) 967 { 968 if (!HINT.closeParent) 969 { 970 return false; 971 } 972 973 if (openTags.length) 974 { 975 var tagName = tag.getName(), 976 tagConfig = tagsConfig[tagName]; 977 978 if (tagConfig.rules.closeParent) 979 { 980 var parent = openTags[openTags.length - 1], 981 parentName = parent.getName(); 982 983 if (tagConfig.rules.closeParent[parentName]) 984 { 985 ++currentFixingCost; 986 987 // We have to close that parent. First we reinsert the tag... 988 tagStack.push(tag); 989 990 // ...then we add a new end tag for it with a better priority 991 addMagicEndTag(parent, tag.getPos(), tag.getSortPriority() - 1); 992 993 return true; 994 } 995 } 996 } 997 998 return false; 999 } 1000 1001 /** 1002 * Apply the createChild rules associated with given tag 1003 * 1004 * @param {!Tag} tag Tag 1005 */ 1006 function createChild(tag) 1007 { 1008 if (!HINT.createChild) 1009 { 1010 return; 1011 } 1012 1013 var tagConfig = tagsConfig[tag.getName()]; 1014 if (tagConfig.rules.createChild) 1015 { 1016 var priority = -1000, 1017 _text = text.substring(pos), 1018 tagPos = pos + _text.length - _text.replace(/^[ \n\r\t]+/, '').length; 1019 tagConfig.rules.createChild.forEach(function(tagName) 1020 { 1021 addStartTag(tagName, tagPos, 0, ++priority); 1022 }); 1023 } 1024 } 1025 1026 /** 1027 * Apply fosterParent rules associated with given tag 1028 * 1029 * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to 1030 * foster itself or two or more tags try to foster each other in a loop. We mitigate the 1031 * risk by preventing a tag from creating a child of itself (the parent still gets closed) 1032 * and by checking and increasing the currentFixingCost so that a loop of multiple tags 1033 * do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the 1034 * loop from running indefinitely 1035 * 1036 * @param {!Tag} tag Tag 1037 * @return {boolean} Whether a new tag has been added 1038 */ 1039 function fosterParent(tag) 1040 { 1041 if (!HINT.fosterParent) 1042 { 1043 return false; 1044 } 1045 1046 if (openTags.length) 1047 { 1048 var tagName = tag.getName(), 1049 tagConfig = tagsConfig[tagName]; 1050 1051 if (tagConfig.rules.fosterParent) 1052 { 1053 var parent = openTags[openTags.length - 1], 1054 parentName = parent.getName(); 1055 1056 if (tagConfig.rules.fosterParent[parentName]) 1057 { 1058 if (parentName !== tagName && currentFixingCost < maxFixingCost) 1059 { 1060 addFosterTag(tag, parent); 1061 } 1062 1063 // Reinsert current tag 1064 tagStack.push(tag); 1065 1066 // And finally close its parent with a priority that ensures it is processed 1067 // before this tag 1068 addMagicEndTag(parent, tag.getPos(), tag.getSortPriority() - 1); 1069 1070 // Adjust the fixing cost to account for the additional tags/processing 1071 currentFixingCost += 4; 1072 1073 return true; 1074 } 1075 } 1076 } 1077 1078 return false; 1079 } 1080 1081 /** 1082 * Apply requireAncestor rules associated with given tag 1083 * 1084 * @param {!Tag} tag Tag 1085 * @return {boolean} Whether this tag has an unfulfilled requireAncestor requirement 1086 */ 1087 function requireAncestor(tag) 1088 { 1089 if (!HINT.requireAncestor) 1090 { 1091 return false; 1092 } 1093 1094 var tagName = tag.getName(), 1095 tagConfig = tagsConfig[tagName]; 1096 1097 if (tagConfig.rules.requireAncestor) 1098 { 1099 var i = tagConfig.rules.requireAncestor.length; 1100 while (--i >= 0) 1101 { 1102 var ancestorName = tagConfig.rules.requireAncestor[i]; 1103 if (cntOpen[ancestorName]) 1104 { 1105 return false; 1106 } 1107 } 1108 1109 logger.err('Tag requires an ancestor', { 1110 'requireAncestor' : tagConfig.rules.requireAncestor.join(', '), 1111 'tag' : tag 1112 }); 1113 1114 return true; 1115 } 1116 1117 return false; 1118 } 1119 1120 //========================================================================== 1121 // Tag processing 1122 //========================================================================== 1123 1124 /** 1125 * Create and add a copy of a tag as a child of a given tag 1126 * 1127 * @param {!Tag} tag Current tag 1128 * @param {!Tag} fosterTag Tag to foster 1129 */ 1130 function addFosterTag(tag, fosterTag) 1131 { 1132 var coords = getMagicStartCoords(tag.getPos() + tag.getLen()), 1133 childPos = coords[0], 1134 childPrio = coords[1]; 1135 1136 // Add a 0-width copy of the parent tag after this tag and make it depend on this tag 1137 var childTag = addCopyTag(fosterTag, childPos, 0, childPrio); 1138 tag.cascadeInvalidationTo(childTag); 1139 } 1140 1141 /** 1142 * Create and add an end tag for given start tag at given position 1143 * 1144 * @param {!Tag} startTag Start tag 1145 * @param {number} tagPos End tag's position (will be adjusted for whitespace if applicable) 1146 * @param {number=} prio End tag's priority 1147 * @return {!Tag} 1148 */ 1149 function addMagicEndTag(startTag, tagPos, prio) 1150 { 1151 var tagName = startTag.getName(); 1152 1153 // Adjust the end tag's position if whitespace is to be minimized 1154 if (HINT.RULE_IGNORE_WHITESPACE && ((currentTag.getFlags() | startTag.getFlags()) & RULE_IGNORE_WHITESPACE)) 1155 { 1156 tagPos = getMagicEndPos(tagPos); 1157 } 1158 1159 // Add a 0-width end tag that is paired with the given start tag 1160 var endTag = addEndTag(tagName, tagPos, 0, prio || 0); 1161 endTag.pairWith(startTag); 1162 1163 return endTag; 1164 } 1165 1166 /** 1167 * Compute the position of a magic end tag, adjusted for whitespace 1168 * 1169 * @param {number} tagPos Rightmost possible position for the tag 1170 * @return {number} 1171 */ 1172 function getMagicEndPos(tagPos) 1173 { 1174 // Back up from given position to the cursor's position until we find a character that 1175 // is not whitespace 1176 while (tagPos > pos && WHITESPACE.indexOf(text[tagPos - 1]) > -1) 1177 { 1178 --tagPos; 1179 } 1180 1181 return tagPos; 1182 } 1183 1184 /** 1185 * Compute the position and priority of a magic start tag, adjusted for whitespace 1186 * 1187 * @param {number} tagPos Leftmost possible position for the tag 1188 * @return {!Array} [Tag pos, priority] 1189 */ 1190 function getMagicStartCoords(tagPos) 1191 { 1192 var nextPos, nextPrio, nextTag, prio; 1193 if (!tagStack.length) 1194 { 1195 // Set the next position outside the text boundaries 1196 nextPos = textLen + 1; 1197 nextPrio = 0; 1198 } 1199 else 1200 { 1201 nextTag = tagStack[tagStack.length - 1]; 1202 nextPos = nextTag.getPos(); 1203 nextPrio = nextTag.getSortPriority(); 1204 } 1205 1206 // Find the first non-whitespace position before next tag or the end of text 1207 while (tagPos < nextPos && WHITESPACE.indexOf(text[tagPos]) > -1) 1208 { 1209 ++tagPos; 1210 } 1211 1212 // Set a priority that ensures this tag appears before the next tag 1213 prio = (tagPos === nextPos) ? nextPrio - 1 : 0; 1214 1215 return [tagPos, prio]; 1216 } 1217 1218 /** 1219 * Test whether given start tag is immediately followed by a closing tag 1220 * 1221 * @param {!Tag} tag Start tag (including self-closing) 1222 * @return {boolean} 1223 */ 1224 function isFollowedByClosingTag(tag) 1225 { 1226 return (!tagStack.length) ? false : tagStack[tagStack.length - 1].canClose(tag); 1227 } 1228 1229 /** 1230 * Process all tags in the stack 1231 */ 1232 function processTags() 1233 { 1234 if (!tagStack.length) 1235 { 1236 return; 1237 } 1238 1239 // Initialize the count tables 1240 for (var tagName in tagsConfig) 1241 { 1242 cntOpen[tagName] = 0; 1243 cntTotal[tagName] = 0; 1244 } 1245 1246 // Process the tag stack, close tags that were left open and repeat until done 1247 do 1248 { 1249 while (tagStack.length) 1250 { 1251 if (!tagStackIsSorted) 1252 { 1253 sortTags(); 1254 } 1255 1256 currentTag = tagStack.pop(); 1257 processCurrentTag(); 1258 } 1259 1260 // Close tags that were left open 1261 openTags.forEach(function (startTag) 1262 { 1263 // NOTE: we add tags in hierarchical order (ancestors to descendants) but since 1264 // the stack is processed in LIFO order, it means that tags get closed in 1265 // the correct order, from descendants to ancestors 1266 addMagicEndTag(startTag, textLen); 1267 }); 1268 } 1269 while (tagStack.length); 1270 } 1271 1272 /** 1273 * Process current tag 1274 */ 1275 function processCurrentTag() 1276 { 1277 // Invalidate current tag if tags are disabled and current tag would not close the last open 1278 // tag and is not a system tag 1279 if ((context.flags & RULE_IGNORE_TAGS) 1280 && !currentTag.canClose(openTags[openTags.length - 1]) 1281 && !currentTag.isSystemTag()) 1282 { 1283 currentTag.invalidate(); 1284 } 1285 1286 var tagPos = currentTag.getPos(), 1287 tagLen = currentTag.getLen(); 1288 1289 // Test whether the cursor passed this tag's position already 1290 if (pos > tagPos && !currentTag.isInvalid()) 1291 { 1292 // Test whether this tag is paired with a start tag and this tag is still open 1293 var startTag = currentTag.getStartTag(); 1294 1295 if (startTag && openTags.indexOf(startTag) >= 0) 1296 { 1297 // Create an end tag that matches current tag's start tag, which consumes as much of 1298 // the same text as current tag and is paired with the same start tag 1299 addEndTag( 1300 startTag.getName(), 1301 pos, 1302 Math.max(0, tagPos + tagLen - pos) 1303 ).pairWith(startTag); 1304 1305 // Note that current tag is not invalidated, it's merely replaced 1306 return; 1307 } 1308 1309 // If this is an ignore tag, try to ignore as much as the remaining text as possible 1310 if (currentTag.isIgnoreTag()) 1311 { 1312 var ignoreLen = tagPos + tagLen - pos; 1313 1314 if (ignoreLen > 0) 1315 { 1316 // Create a new ignore tag and move on 1317 addIgnoreTag(pos, ignoreLen); 1318 1319 return; 1320 } 1321 } 1322 1323 // Skipped tags are invalidated 1324 currentTag.invalidate(); 1325 } 1326 1327 if (currentTag.isInvalid()) 1328 { 1329 return; 1330 } 1331 1332 if (currentTag.isIgnoreTag()) 1333 { 1334 outputIgnoreTag(currentTag); 1335 } 1336 else if (currentTag.isBrTag()) 1337 { 1338 // Output the tag if it's allowed, ignore it otherwise 1339 if (!HINT.RULE_PREVENT_BR || !(context.flags & RULE_PREVENT_BR)) 1340 { 1341 outputBrTag(currentTag); 1342 } 1343 } 1344 else if (currentTag.isParagraphBreak()) 1345 { 1346 outputText(currentTag.getPos(), 0, true); 1347 } 1348 else if (currentTag.isVerbatim()) 1349 { 1350 outputVerbatim(currentTag); 1351 } 1352 else if (currentTag.isStartTag()) 1353 { 1354 processStartTag(currentTag); 1355 } 1356 else 1357 { 1358 processEndTag(currentTag); 1359 } 1360 } 1361 1362 /** 1363 * Process given start tag (including self-closing tags) at current position 1364 * 1365 * @param {!Tag} tag Start tag (including self-closing) 1366 */ 1367 function processStartTag(tag) 1368 { 1369 var tagName = tag.getName(), 1370 tagConfig = tagsConfig[tagName]; 1371 1372 // 1. Check that this tag has not reached its global limit tagLimit 1373 // 2. Execute this tag's filterChain, which will filter/validate its attributes 1374 // 3. Apply closeParent, closeAncestor and fosterParent rules 1375 // 4. Check for nestingLimit 1376 // 5. Apply requireAncestor rules 1377 // 1378 // This order ensures that the tag is valid and within the set limits before we attempt to 1379 // close parents or ancestors. We need to close ancestors before we can check for nesting 1380 // limits, whether this tag is allowed within current context (the context may change 1381 // as ancestors are closed) or whether the required ancestors are still there (they might 1382 // have been closed by a rule.) 1383 if (cntTotal[tagName] >= tagConfig.tagLimit) 1384 { 1385 logger.err( 1386 'Tag limit exceeded', 1387 { 1388 'tag' : tag, 1389 'tagName' : tagName, 1390 'tagLimit' : tagConfig.tagLimit 1391 } 1392 ); 1393 tag.invalidate(); 1394 1395 return; 1396 } 1397 1398 filterTag(tag); 1399 if (tag.isInvalid()) 1400 { 1401 return; 1402 } 1403 1404 if (currentFixingCost < maxFixingCost) 1405 { 1406 if (fosterParent(tag) || closeParent(tag) || closeAncestor(tag)) 1407 { 1408 // This tag parent/ancestor needs to be closed, we just return (the tag is still valid) 1409 return; 1410 } 1411 } 1412 1413 if (cntOpen[tagName] >= tagConfig.nestingLimit) 1414 { 1415 logger.err( 1416 'Nesting limit exceeded', 1417 { 1418 'tag' : tag, 1419 'tagName' : tagName, 1420 'nestingLimit' : tagConfig.nestingLimit 1421 } 1422 ); 1423 tag.invalidate(); 1424 1425 return; 1426 } 1427 1428 if (!tagIsAllowed(tagName)) 1429 { 1430 var msg = 'Tag is not allowed in this context', 1431 context = {'tag': tag, 'tagName': tagName}; 1432 if (tag.getLen() > 0) 1433 { 1434 logger.warn(msg, context); 1435 } 1436 else 1437 { 1438 logger.debug(msg, context); 1439 } 1440 tag.invalidate(); 1441 1442 return; 1443 } 1444 1445 if (requireAncestor(tag)) 1446 { 1447 tag.invalidate(); 1448 1449 return; 1450 } 1451 1452 // If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or 1453 // immediately followed by an end tag, we replace it with a self-closing tag with the same 1454 // properties 1455 if (HINT.RULE_AUTO_CLOSE 1456 && tag.getFlags() & RULE_AUTO_CLOSE 1457 && !tag.isSelfClosingTag() 1458 && !tag.getEndTag() 1459 && !isFollowedByClosingTag(tag)) 1460 { 1461 var newTag = new Tag(Tag.SELF_CLOSING_TAG, tagName, tag.getPos(), tag.getLen()); 1462 newTag.setAttributes(tag.getAttributes()); 1463 newTag.setFlags(tag.getFlags()); 1464 1465 tag = newTag; 1466 } 1467 1468 if (HINT.RULE_TRIM_FIRST_LINE 1469 && tag.getFlags() & RULE_TRIM_FIRST_LINE 1470 && text[tag.getPos() + tag.getLen()] === "\n") 1471 { 1472 addIgnoreTag(tag.getPos() + tag.getLen(), 1); 1473 } 1474 1475 // This tag is valid, output it and update the context 1476 outputTag(tag); 1477 pushContext(tag); 1478 1479 // Apply the createChild rules if applicable 1480 createChild(tag); 1481 } 1482 1483 /** 1484 * Process given end tag at current position 1485 * 1486 * @param {!Tag} tag End tag 1487 */ 1488 function processEndTag(tag) 1489 { 1490 var tagName = tag.getName(); 1491 1492 if (!cntOpen[tagName]) 1493 { 1494 // This is an end tag with no start tag 1495 return; 1496 } 1497 1498 /** 1499 * @type {!Array.<!Tag>} List of tags need to be closed before given tag 1500 */ 1501 var closeTags = []; 1502 1503 // Iterate through all open tags from last to first to find a match for our tag 1504 var i = openTags.length; 1505 while (--i >= 0) 1506 { 1507 var openTag = openTags[i]; 1508 1509 if (tag.canClose(openTag)) 1510 { 1511 break; 1512 } 1513 1514 closeTags.push(openTag); 1515 ++currentFixingCost; 1516 } 1517 1518 if (i < 0) 1519 { 1520 // Did not find a matching tag 1521 logger.debug('Skipping end tag with no start tag', {'tag': tag}); 1522 1523 return; 1524 } 1525 1526 // Accumulate flags to determine whether whitespace should be trimmed 1527 var flags = tag.getFlags(); 1528 closeTags.forEach(function(openTag) 1529 { 1530 flags |= openTag.getFlags(); 1531 }); 1532 var ignoreWhitespace = (HINT.RULE_IGNORE_WHITESPACE && (flags & RULE_IGNORE_WHITESPACE)); 1533 1534 // Only reopen tags if we haven't exceeded our "fixing" budget 1535 var keepReopening = HINT.RULE_AUTO_REOPEN && (currentFixingCost < maxFixingCost), 1536 reopenTags = []; 1537 closeTags.forEach(function(openTag) 1538 { 1539 var openTagName = openTag.getName(); 1540 1541 // Test whether this tag should be reopened automatically 1542 if (keepReopening) 1543 { 1544 if (openTag.getFlags() & RULE_AUTO_REOPEN) 1545 { 1546 reopenTags.push(openTag); 1547 } 1548 else 1549 { 1550 keepReopening = false; 1551 } 1552 } 1553 1554 // Find the earliest position we can close this open tag 1555 var tagPos = tag.getPos(); 1556 if (ignoreWhitespace) 1557 { 1558 tagPos = getMagicEndPos(tagPos); 1559 } 1560 1561 // Output an end tag to close this start tag, then update the context 1562 var endTag = new Tag(Tag.END_TAG, openTagName, tagPos, 0); 1563 endTag.setFlags(openTag.getFlags()); 1564 outputTag(endTag); 1565 popContext(); 1566 }); 1567 1568 // Output our tag, moving the cursor past it, then update the context 1569 outputTag(tag); 1570 popContext(); 1571 1572 // If our fixing budget allows it, peek at upcoming tags and remove end tags that would 1573 // close tags that are already being closed now. Also, filter our list of tags being 1574 // reopened by removing those that would immediately be closed 1575 if (closeTags.length && currentFixingCost < maxFixingCost) 1576 { 1577 /** 1578 * @type {number} Rightmost position of the portion of text to ignore 1579 */ 1580 var ignorePos = pos; 1581 1582 i = tagStack.length; 1583 while (--i >= 0 && ++currentFixingCost < maxFixingCost) 1584 { 1585 var upcomingTag = tagStack[i]; 1586 1587 // Test whether the upcoming tag is positioned at current "ignore" position and it's 1588 // strictly an end tag (not a start tag or a self-closing tag) 1589 if (upcomingTag.getPos() > ignorePos 1590 || upcomingTag.isStartTag()) 1591 { 1592 break; 1593 } 1594 1595 // Test whether this tag would close any of the tags we're about to reopen 1596 var j = closeTags.length; 1597 1598 while (--j >= 0 && ++currentFixingCost < maxFixingCost) 1599 { 1600 if (upcomingTag.canClose(closeTags[j])) 1601 { 1602 // Remove the tag from the lists and reset the keys 1603 closeTags.splice(j, 1); 1604 1605 if (reopenTags[j]) 1606 { 1607 reopenTags.splice(j, 1); 1608 } 1609 1610 // Extend the ignored text to cover this tag 1611 ignorePos = Math.max( 1612 ignorePos, 1613 upcomingTag.getPos() + upcomingTag.getLen() 1614 ); 1615 1616 break; 1617 } 1618 } 1619 } 1620 1621 if (ignorePos > pos) 1622 { 1623 /** 1624 * @todo have a method that takes (pos,len) rather than a Tag 1625 */ 1626 outputIgnoreTag(new Tag(Tag.SELF_CLOSING_TAG, 'i', pos, ignorePos - pos)); 1627 } 1628 } 1629 1630 // Re-add tags that need to be reopened, at current cursor position 1631 reopenTags.forEach(function(startTag) 1632 { 1633 var newTag = addCopyTag(startTag, pos, 0); 1634 1635 // Re-pair the new tag 1636 var endTag = startTag.getEndTag(); 1637 if (endTag) 1638 { 1639 newTag.pairWith(endTag); 1640 } 1641 }); 1642 } 1643 1644 /** 1645 * Update counters and replace current context with its parent context 1646 */ 1647 function popContext() 1648 { 1649 var tag = openTags.pop(); 1650 --cntOpen[tag.getName()]; 1651 context = context.parentContext; 1652 } 1653 1654 /** 1655 * Update counters and replace current context with a new context based on given tag 1656 * 1657 * If given tag is a self-closing tag, the context won't change 1658 * 1659 * @param {!Tag} tag Start tag (including self-closing) 1660 */ 1661 function pushContext(tag) 1662 { 1663 var tagName = tag.getName(), 1664 tagFlags = tag.getFlags(), 1665 tagConfig = tagsConfig[tagName]; 1666 1667 ++cntTotal[tagName]; 1668 1669 // If this is a self-closing tag, the context remains the same 1670 if (tag.isSelfClosingTag()) 1671 { 1672 return; 1673 } 1674 1675 // Recompute the allowed tags 1676 var allowed = []; 1677 context.allowed.forEach(function(v, k) 1678 { 1679 // If the current tag is not transparent, override the low bits (allowed children) of 1680 // current context with its high bits (allowed descendants) 1681 if (!HINT.RULE_IS_TRANSPARENT || !(tagFlags & RULE_IS_TRANSPARENT)) 1682 { 1683 v = (v & 0xFF00) | (v >> 8); 1684 } 1685 allowed.push(tagConfig.allowed[k] & v); 1686 }); 1687 1688 // Use this tag's flags as a base for this context and add inherited rules 1689 var flags = tagFlags | (context.flags & RULES_INHERITANCE); 1690 1691 // RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR 1692 if (flags & RULE_DISABLE_AUTO_BR) 1693 { 1694 flags &= ~RULE_ENABLE_AUTO_BR; 1695 } 1696 1697 ++cntOpen[tagName]; 1698 openTags.push(tag); 1699 context = { parentContext : context }; 1700 context.allowed = allowed; 1701 context.flags = flags; 1702 } 1703 1704 /** 1705 * Return whether given tag is allowed in current context 1706 * 1707 * @param {string} tagName 1708 * @return {boolean} 1709 */ 1710 function tagIsAllowed(tagName) 1711 { 1712 var n = tagsConfig[tagName].bitNumber; 1713 1714 return !!(context.allowed[n >> 3] & (1 << (n & 7))); 1715 } 1716 1717 //========================================================================== 1718 // Tag stack 1719 //========================================================================== 1720 1721 /** 1722 * Add a start tag 1723 * 1724 * @param {string} name Name of the tag 1725 * @param {number} pos Position of the tag in the text 1726 * @param {number} len Length of text consumed by the tag 1727 * @param {number=} prio Tags' priority 1728 * @return {!Tag} 1729 */ 1730 function addStartTag(name, pos, len, prio) 1731 { 1732 return addTag(Tag.START_TAG, name, pos, len, prio || 0); 1733 } 1734 1735 /** 1736 * Add an end tag 1737 * 1738 * @param {string} name Name of the tag 1739 * @param {number} pos Position of the tag in the text 1740 * @param {number} len Length of text consumed by the tag 1741 * @param {number=} prio Tags' priority 1742 * @return {!Tag} 1743 */ 1744 function addEndTag(name, pos, len, prio) 1745 { 1746 return addTag(Tag.END_TAG, name, pos, len, prio || 0); 1747 } 1748 1749 /** 1750 * Add a self-closing tag 1751 * 1752 * @param {string} name Name of the tag 1753 * @param {number} pos Position of the tag in the text 1754 * @param {number} len Length of text consumed by the tag 1755 * @param {number=} prio Tags' priority 1756 * @return {!Tag} 1757 */ 1758 function addSelfClosingTag(name, pos, len, prio) 1759 { 1760 return addTag(Tag.SELF_CLOSING_TAG, name, pos, len, prio || 0); 1761 } 1762 1763 /** 1764 * Add a 0-width "br" tag to force a line break at given position 1765 * 1766 * @param {number} pos Position of the tag in the text 1767 * @param {number=} prio Tags' priority 1768 * @return {!Tag} 1769 */ 1770 function addBrTag(pos, prio) 1771 { 1772 return addTag(Tag.SELF_CLOSING_TAG, 'br', pos, 0, prio || 0); 1773 } 1774 1775 /** 1776 * Add an "ignore" tag 1777 * 1778 * @param {number} pos Position of the tag in the text 1779 * @param {number} len Length of text consumed by the tag 1780 * @param {number=} prio Tags' priority 1781 * @return {!Tag} 1782 */ 1783 function addIgnoreTag(pos, len, prio) 1784 { 1785 return addTag(Tag.SELF_CLOSING_TAG, 'i', pos, Math.min(len, textLen - pos), prio || 0); 1786 } 1787 1788 /** 1789 * Add a paragraph break at given position 1790 * 1791 * Uses a zero-width tag that is actually never output in the result 1792 * 1793 * @param {number} pos Position of the tag in the text 1794 * @param {number=} prio Tags' priority 1795 * @return {!Tag} 1796 */ 1797 function addParagraphBreak(pos, prio) 1798 { 1799 return addTag(Tag.SELF_CLOSING_TAG, 'pb', pos, 0, prio || 0); 1800 } 1801 1802 /** 1803 * Add a copy of given tag at given position and length 1804 * 1805 * @param {!Tag} tag Original tag 1806 * @param {number} pos Copy's position 1807 * @param {number} len Copy's length 1808 * @param {number=} prio Tags' priority 1809 * @return {!Tag} Copy tag 1810 */ 1811 function addCopyTag(tag, pos, len, prio) 1812 { 1813 var copy = addTag(tag.getType(), tag.getName(), pos, len, tag.getSortPriority()); 1814 copy.setAttributes(tag.getAttributes()); 1815 1816 return copy; 1817 } 1818 1819 /** 1820 * Add a tag 1821 * 1822 * @param {number} type Tag's type 1823 * @param {string} name Name of the tag 1824 * @param {number} pos Position of the tag in the text 1825 * @param {number} len Length of text consumed by the tag 1826 * @param {number=} prio Tags' priority 1827 * @return {!Tag} 1828 */ 1829 function addTag(type, name, pos, len, prio) 1830 { 1831 // Create the tag 1832 var tag = new Tag(type, name, pos, len, prio || 0); 1833 1834 // Set this tag's rules bitfield 1835 if (tagsConfig[name]) 1836 { 1837 tag.setFlags(tagsConfig[name].rules.flags); 1838 } 1839 1840 // Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or 1841 // position is negative or if it's out of bounds 1842 if ((!tagsConfig[name] && !tag.isSystemTag()) || isInvalidTextSpan(pos, len)) 1843 { 1844 tag.invalidate(); 1845 } 1846 else if (tagsConfig[name] && tagsConfig[name].isDisabled) 1847 { 1848 logger.warn( 1849 'Tag is disabled', 1850 { 1851 'tag' : tag, 1852 'tagName' : name 1853 } 1854 ); 1855 tag.invalidate(); 1856 } 1857 else 1858 { 1859 insertTag(tag); 1860 } 1861 1862 return tag; 1863 } 1864 1865 /** 1866 * Test whether given text span is outside text boundaries or an invalid UTF sequence 1867 * 1868 * @param {number} pos Start of text 1869 * @param {number} len Length of text 1870 * @return {boolean} 1871 */ 1872 function isInvalidTextSpan(pos, len) 1873 { 1874 return (len < 0 || pos < 0 || pos + len > textLen || /[\uDC00-\uDFFF]/.test(text.substring(pos, pos + 1) + text.substring(pos + len, pos + len + 1))); 1875 } 1876 1877 /** 1878 * Insert given tag in the tag stack 1879 * 1880 * @param {!Tag} tag 1881 */ 1882 function insertTag(tag) 1883 { 1884 if (!tagStackIsSorted) 1885 { 1886 tagStack.push(tag); 1887 } 1888 else 1889 { 1890 // Scan the stack and copy every tag to the next slot until we find the correct index 1891 var i = tagStack.length, 1892 key = getSortKey(tag); 1893 while (i > 0 && key > getSortKey(tagStack[i - 1])) 1894 { 1895 tagStack[i] = tagStack[i - 1]; 1896 --i; 1897 } 1898 tagStack[i] = tag; 1899 } 1900 } 1901 1902 /** 1903 * Add a pair of tags 1904 * 1905 * @param {string} name Name of the tags 1906 * @param {number} startPos Position of the start tag 1907 * @param {number} startLen Length of the start tag 1908 * @param {number} endPos Position of the start tag 1909 * @param {number} endLen Length of the start tag 1910 * @param {number=} prio Start tag's priority (the end tag will be set to minus that value) 1911 * @return {!Tag} Start tag 1912 */ 1913 function addTagPair(name, startPos, startLen, endPos, endLen, prio) 1914 { 1915 // NOTE: the end tag is added first to try to keep the stack in the correct order 1916 var endTag = addEndTag(name, endPos, endLen, -prio || 0), 1917 startTag = addStartTag(name, startPos, startLen, prio || 0); 1918 startTag.pairWith(endTag); 1919 1920 return startTag; 1921 } 1922 1923 /** 1924 * Add a tag that represents a verbatim copy of the original text 1925 * 1926 * @param {number} pos Position of the tag in the text 1927 * @param {number} len Length of text consumed by the tag 1928 * @param {number=} prio Tag's priority 1929 * @return {!Tag} 1930 */ 1931 function addVerbatim(pos, len, prio) 1932 { 1933 return addTag(Tag.SELF_CLOSING_TAG, 'v', pos, len, prio || 0); 1934 } 1935 1936 /** 1937 * Sort tags by position and precedence 1938 */ 1939 function sortTags() 1940 { 1941 var arr = {}, 1942 keys = [], 1943 i = tagStack.length; 1944 while (--i >= 0) 1945 { 1946 var tag = tagStack[i], 1947 key = getSortKey(tag, i); 1948 keys.push(key); 1949 arr[key] = tag; 1950 } 1951 keys.sort(); 1952 1953 i = keys.length; 1954 tagStack = []; 1955 while (--i >= 0) 1956 { 1957 tagStack.push(arr[keys[i]]); 1958 } 1959 1960 tagStackIsSorted = true; 1961 } 1962 1963 /** 1964 * Generate a key for given tag that can be used to compare its position using lexical comparisons 1965 * 1966 * Tags are sorted by position first, then by priority, then by whether they consume any text, 1967 * then by length, and finally in order of their creation. 1968 * 1969 * The stack's array is in reverse order. Therefore, tags that appear at the start of the text 1970 * are at the end of the array. 1971 * 1972 * @param {!Tag} tag 1973 * @param {number=} tagIndex 1974 * @return {string} 1975 */ 1976 function getSortKey(tag, tagIndex) 1977 { 1978 // Ensure that negative values are sorted correctly by flagging them and making them positive 1979 var prioFlag = (tag.getSortPriority() >= 0), 1980 prio = tag.getSortPriority(); 1981 if (!prioFlag) 1982 { 1983 prio += (1 << 30); 1984 } 1985 1986 // Sort 0-width tags separately from the rest 1987 var lenFlag = (tag.getLen() > 0), 1988 lenOrder; 1989 if (lenFlag) 1990 { 1991 // Inverse their length so that longest matches are processed first 1992 lenOrder = textLen - tag.getLen(); 1993 } 1994 else 1995 { 1996 // Sort self-closing tags in-between start tags and end tags to keep them outside of tag 1997 // pairs 1998 var order = {}; 1999 order[Tag.END_TAG] = 0; 2000 order[Tag.SELF_CLOSING_TAG] = 1; 2001 order[Tag.START_TAG] = 2; 2002 lenOrder = order[tag.getType()]; 2003 } 2004 2005 return hex32(tag.getPos()) + (+prioFlag) + hex32(prio) + (+lenFlag) + hex32(lenOrder) + hex32(tagIndex || 0); 2006 } 2007 2008 /** 2009 * Format given number to a 32 bit hex value 2010 * 2011 * @param {number} number 2012 * @return {string} 2013 */ 2014 function hex32(number) 2015 { 2016 var hex = number.toString(16); 2017 2018 return " ".substring(hex.length) + hex; 2019 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Mon Nov 25 19:05:08 2024 | Cross-referenced by PHPXref 0.7.1 |