element is only part of the * "interactive content" category if it has a "usemap" attribute. Those restrictions are * expressed as an XPath expression and stored using the concatenation of the key of the bitfield * plus the bit number of the category. For instance, if "interactive content" got assigned to * bit 2, the definition of the element will contain a key "c2" with value "@usemap". * * Additionally, other flags are set: * * "t" indicates that the element uses the "transparent" content model. * "e" indicates that the element uses the "empty" content model. * "v" indicates that the element is a void element. * "nt" indicates that the element does not accept text nodes. (no text) * "to" indicates that the element should only contain text. (text-only) * "fe" indicates that the element is a formatting element. It will automatically be reopened * when closed by an end tag of a different name. * "b" indicates that the element is not phrasing content, which makes it likely to act like * a block element. * * Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its * predecessor. Those are used to generate closeParent rules and are stored in the "cp" key. * * @var array * @see /scripts/patchElementInspector.php */ protected static $htmlElements = [ 'a'=>['c'=>"\17\0\0\0\200",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\200",'t'=>1,'fe'=>1], 'abbr'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'address'=>['c'=>"\3\20",'ac'=>"\1",'dd'=>"\100\24",'b'=>1,'cp'=>['p']], 'area'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'article'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'audio'=>['c'=>"\57\0\10",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\220",'ac28'=>'not(@src)','dd'=>"\0\0\10",'t'=>1], 'b'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'base'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'bdi'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'bdo'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'blockquote'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'body'=>['c'=>"\0\0\20",'ac'=>"\1",'dd'=>"\0",'b'=>1], 'br'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'button'=>['c'=>"\217",'ac'=>"\4",'dd'=>"\10"], 'canvas'=>['c'=>"\47",'ac'=>"\0",'dd'=>"\10",'t'=>1], 'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\0\1",'b'=>1], 'cite'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'code'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'col'=>['c'=>"\0\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\100",'ac22'=>'not(@span)','dd'=>"\0",'nt'=>1,'e'=>1,'e?'=>'@span','b'=>1], 'data'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\40",'dd'=>"\0"], 'dd'=>['c'=>"\0\100\0\1",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['dd','dt']], 'del'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'details'=>['c'=>"\13",'ac'=>"\1\0\0\10",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'dfn'=>['c'=>"\7\0\0\0\20",'ac'=>"\4",'dd'=>"\0\0\0\0\20"], 'dialog'=>['c'=>"\1",'ac'=>"\1",'dd'=>"\0",'b'=>1], 'div'=>['c'=>"\3\100",'ac'=>"\1\200\0\1",'ac0'=>'not(ancestor::dl)','dd'=>"\0",'b'=>1,'cp'=>['p']], 'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\300",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'dt'=>['c'=>"\0\100\0\1",'ac'=>"\1",'dd'=>"\100\4\4",'b'=>1,'cp'=>['dd','dt']], 'em'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'embed'=>['c'=>"\57",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'fieldset'=>['c'=>"\203",'ac'=>"\1\0\0\100",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'figcaption'=>['c'=>"\0\0\0\0\0\1",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'figure'=>['c'=>"\3",'ac'=>"\1\0\0\0\0\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'footer'=>['c'=>"\3\20\4",'ac'=>"\1",'dd'=>"\0\0\4",'b'=>1,'cp'=>['p']], 'form'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']], 'h1'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h2'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h3'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h4'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h5'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h6'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'head'=>['c'=>"\0\0\20",'ac'=>"\20",'dd'=>"\0",'nt'=>1,'b'=>1], 'header'=>['c'=>"\3\20\4",'ac'=>"\1",'dd'=>"\0\0\4",'b'=>1,'cp'=>['p']], 'hgroup'=>['c'=>"\103",'ac'=>"\0\201",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'hr'=>['c'=>"\1\40",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']], 'html'=>['c'=>"\0",'ac'=>"\0\0\20",'dd'=>"\0",'nt'=>1,'b'=>1], 'i'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'iframe'=>['c'=>"\57",'ac'=>"\4",'dd'=>"\0"], 'img'=>['c'=>"\257\0\40",'c3'=>'@usemap','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'input'=>['c'=>"\217",'c3'=>'@type!="hidden"','c7'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'ins'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'kbd'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'label'=>['c'=>"\17\0\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"], 'legend'=>['c'=>"\0\0\0\100",'ac'=>"\104",'dd'=>"\0",'b'=>1], 'li'=>['c'=>"\0\0\0\0\100",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['li']], 'link'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'main'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'map'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'mark'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'menu'=>['c'=>"\3\40",'c1'=>'li','ac'=>"\0\240",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'menuitem'=>['c'=>"\0\40",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 'meta'=>['c'=>"\25",'c0'=>'@itemprop','c2'=>'@itemprop','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"], 'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'noscript'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1], 'object'=>['c'=>"\47\0\0\2",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']], 'option'=>['c'=>"\0\0\2\40",'ac'=>"\0",'dd'=>"\0",'b'=>1,'cp'=>['option']], 'output'=>['c'=>"\207",'ac'=>"\4",'dd'=>"\0"], 'p'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'picture'=>['c'=>"\45",'ac'=>"\0\200\40",'dd'=>"\0",'nt'=>1], 'pre'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'pre'=>1,'b'=>1,'cp'=>['p']], 'progress'=>['c'=>"\7\0\1\4",'ac'=>"\4",'dd'=>"\0\0\0\4"], 'q'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'rb'=>['c'=>"\0\10",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 'rp'=>['c'=>"\0\10",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1,'cp'=>['rp','rt']], 'rt'=>['c'=>"\0\10",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']], 'rtc'=>['c'=>"\0\10",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 'ruby'=>['c'=>"\7",'ac'=>"\4\10",'dd'=>"\0"], 's'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'samp'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'script'=>['c'=>"\25\200",'ac'=>"\0",'dd'=>"\0",'to'=>1], 'section'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'select'=>['c'=>"\217",'ac'=>"\0\200\2",'dd'=>"\0",'nt'=>1], 'slot'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'small'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'source'=>['c'=>"\0\0\40\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'span'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'strong'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'style'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1], 'sub'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'summary'=>['c'=>"\0\0\0\10",'ac'=>"\104",'dd'=>"\0",'b'=>1], 'sup'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'table'=>['c'=>"\3\0\0\0\1",'ac'=>"\0\202",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']], 'td'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['td','th']], 'template'=>['c'=>"\25\200\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1], 'textarea'=>['c'=>"\217",'ac'=>"\0",'dd'=>"\0",'pre'=>1,'to'=>1], 'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']], 'th'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\100\4\4",'b'=>1,'cp'=>['td','th']], 'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1], 'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime','dd'=>"\0"], 'title'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1], 'tr'=>['c'=>"\0\2\0\0\40",'ac'=>"\0\200\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']], 'track'=>['c'=>"\0\0\0\200",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'u'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'var'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'video'=>['c'=>"\57\0\10",'c3'=>'@controls','ac'=>"\0\0\0\220",'ac28'=>'not(@src)','dd'=>"\0\0\10",'t'=>1], 'wbr'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1] ]; /** * Test whether given child element closes given parent element * * @param DOMElement $child * @param DOMElement $parent * @return bool */ public static function closesParent(DOMElement $child, DOMElement $parent) { $parentName = $parent->nodeName; $childName = $child->nodeName; return !empty(self::$htmlElements[$childName]['cp']) && in_array($parentName, self::$htmlElements[$childName]['cp'], true); } /** * Test whether given element disallows text nodes * * @param DOMElement $element * @return bool */ public static function disallowsText(DOMElement $element) { return self::hasProperty($element, 'nt'); } /** * Return the "allowChild" bitfield for given element * * @param DOMElement $element * @return string */ public static function getAllowChildBitfield(DOMElement $element) { return self::getBitfield($element, 'ac'); } /** * Return the "category" bitfield for given element * * @param DOMElement $element * @return string */ public static function getCategoryBitfield(DOMElement $element) { return self::getBitfield($element, 'c'); } /** * Return the "denyDescendant" bitfield for given element * * @param DOMElement $element * @return string */ public static function getDenyDescendantBitfield(DOMElement $element) { return self::getBitfield($element, 'dd'); } /** * Test whether given element is a block element * * @param DOMElement $element * @return bool */ public static function isBlock(DOMElement $element) { return self::hasProperty($element, 'b'); } /** * Test whether given element uses the empty content model * * @param DOMElement $element * @return bool */ public static function isEmpty(DOMElement $element) { return self::hasProperty($element, 'e'); } /** * Test whether given element is a formatting element * * @param DOMElement $element * @return bool */ public static function isFormattingElement(DOMElement $element) { return self::hasProperty($element, 'fe'); } /** * Test whether given element only accepts text nodes * * @param DOMElement $element * @return bool */ public static function isTextOnly(DOMElement $element) { return self::hasProperty($element, 'to'); } /** * Test whether given element uses the transparent content model * * @param DOMElement $element * @return bool */ public static function isTransparent(DOMElement $element) { return self::hasProperty($element, 't'); } /** * Test whether given element uses the void content model * * @param DOMElement $element * @return bool */ public static function isVoid(DOMElement $element) { return self::hasProperty($element, 'v'); } /** * Test whether given element preserves whitespace in its content * * @param DOMElement $element * @return bool */ public static function preservesWhitespace(DOMElement $element) { return self::hasProperty($element, 'pre'); } /** * Evaluate an XPath query using given element as context node * * @param string $query XPath query * @param DOMElement $element Context node * @return bool */ protected static function evaluate($query, DOMElement $element) { $xpath = new DOMXPath($element->ownerDocument); return $xpath->evaluate('boolean(' . $query . ')', $element); } /** * Get the bitfield value for a given element * * @param DOMElement $element Context node * @param string $name Bitfield name: either 'c', 'ac' or 'dd' * @return string */ protected static function getBitfield(DOMElement $element, $name) { $props = self::getProperties($element); $bitfield = self::toBin($props[$name]); // For each bit set to 1, test whether there is an XPath condition to it and whether it is // fulfilled. If not, turn the bit to 0 foreach (array_keys(array_filter(str_split($bitfield, 1))) as $bitNumber) { $conditionName = $name . $bitNumber; if (isset($props[$conditionName]) && !self::evaluate($props[$conditionName], $element)) { $bitfield[$bitNumber] = '0'; } } return self::toRaw($bitfield); } /** * Return the properties associated with given element * * Returns span's properties if the element is not defined * * @param DOMElement $element * @return array */ protected static function getProperties(DOMElement $element) { return self::$htmlElements[$element->nodeName] ?? self::$htmlElements['span']; } /** * Test whether given element has given property in context * * @param DOMElement $element Context node * @param string $propName Property name, see self::$htmlElements * @return bool */ protected static function hasProperty(DOMElement $element, $propName) { $props = self::getProperties($element); return !empty($props[$propName]) && (!isset($props[$propName . '?']) || self::evaluate($props[$propName . '?'], $element)); } /** * Convert a raw string to a series of 0 and 1 in LSB order * * @param string $raw * @return string */ protected static function toBin($raw) { $bin = ''; foreach (str_split($raw, 1) as $char) { $bin .= strrev(substr('0000000' . decbin(ord($char)), -8)); } return $bin; } /** * Convert a series of 0 and 1 in LSB order to a raw string * * @param string $bin * @return string */ protected static function toRaw($bin) { return implode('', array_map('chr', array_map('bindec', array_map('strrev', str_split($bin, 8))))); } }