[ Index ] |
PHP Cross Reference of phpBB-3.3.12-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @package s9e\TextFormatter 5 * @copyright Copyright (c) 2010-2022 The s9e authors 6 * @license http://www.opensource.org/licenses/mit-license.php The MIT License 7 */ 8 namespace s9e\TextFormatter\Configurator\Helpers; 9 10 use DOMElement; 11 use DOMXPath; 12 13 class ElementInspector 14 { 15 /** 16 * This is an abridged version of the HTML5 content models and rules, with some liberties taken. 17 * 18 * For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored 19 * as raw bytes, formatted using the octal notation to keep the sources ASCII. 20 * 21 * "c" represents the categories the element belongs to. The categories are comprised of HTML5 22 * content models (such as "phrasing content" or "interactive content") plus a few special 23 * categories created to cover the parts of the specs that refer to "a group of X and Y 24 * elements" rather than a specific content model. 25 * 26 * "ac" represents the categories that are allowed as children of given element. 27 * 28 * "dd" represents the categories that must not appear as a descendant of given element. 29 * 30 * Sometimes, HTML5 specifies some restrictions on when an element can accept certain children, 31 * or what categories the element belongs to. For example, an <img> element is only part of the 32 * "interactive content" category if it has a "usemap" attribute. Those restrictions are 33 * expressed as an XPath expression and stored using the concatenation of the key of the bitfield 34 * plus the bit number of the category. For instance, if "interactive content" got assigned to 35 * bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap". 36 * 37 * Additionally, other flags are set: 38 * 39 * "t" indicates that the element uses the "transparent" content model. 40 * "e" indicates that the element uses the "empty" content model. 41 * "v" indicates that the element is a void element. 42 * "nt" indicates that the element does not accept text nodes. (no text) 43 * "to" indicates that the element should only contain text. (text-only) 44 * "fe" indicates that the element is a formatting element. It will automatically be reopened 45 * when closed by an end tag of a different name. 46 * "b" indicates that the element is not phrasing content, which makes it likely to act like 47 * a block element. 48 * 49 * Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its 50 * predecessor. Those are used to generate closeParent rules and are stored in the "cp" key. 51 * 52 * @var array 53 * @see /scripts/patchElementInspector.php 54 */ 55 protected static $htmlElements = [ 56 'a'=>['c'=>"\17\0\0\0\200",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\200",'t'=>1,'fe'=>1], 57 'abbr'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 58 'address'=>['c'=>"\3\20",'ac'=>"\1",'dd'=>"\100\24",'b'=>1,'cp'=>['p']], 59 'area'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 60 'article'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 61 'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 62 'audio'=>['c'=>"\57\0\10",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\220",'ac28'=>'not(@src)','dd'=>"\0\0\10",'t'=>1], 63 'b'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 64 'base'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 65 'bdi'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 66 'bdo'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 67 'blockquote'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 68 'body'=>['c'=>"\0\0\20",'ac'=>"\1",'dd'=>"\0",'b'=>1], 69 'br'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 70 'button'=>['c'=>"\217",'ac'=>"\4",'dd'=>"\10"], 71 'canvas'=>['c'=>"\47",'ac'=>"\0",'dd'=>"\10",'t'=>1], 72 'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\0\1",'b'=>1], 73 'cite'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 74 'code'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 75 'col'=>['c'=>"\0\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 76 'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\100",'ac22'=>'not(@span)','dd'=>"\0",'nt'=>1,'e'=>1,'e?'=>'@span','b'=>1], 77 'data'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 78 'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\40",'dd'=>"\0"], 79 'dd'=>['c'=>"\0\100\0\1",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['dd','dt']], 80 'del'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1], 81 'details'=>['c'=>"\13",'ac'=>"\1\0\0\10",'dd'=>"\0",'b'=>1,'cp'=>['p']], 82 'dfn'=>['c'=>"\7\0\0\0\20",'ac'=>"\4",'dd'=>"\0\0\0\0\20"], 83 'dialog'=>['c'=>"\1",'ac'=>"\1",'dd'=>"\0",'b'=>1], 84 'div'=>['c'=>"\3\100",'ac'=>"\1\200\0\1",'ac0'=>'not(ancestor::dl)','dd'=>"\0",'b'=>1,'cp'=>['p']], 85 'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\300",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 86 'dt'=>['c'=>"\0\100\0\1",'ac'=>"\1",'dd'=>"\100\4\4",'b'=>1,'cp'=>['dd','dt']], 87 'em'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 88 'embed'=>['c'=>"\57",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 89 'fieldset'=>['c'=>"\203",'ac'=>"\1\0\0\100",'dd'=>"\0",'b'=>1,'cp'=>['p']], 90 'figcaption'=>['c'=>"\0\0\0\0\0\1",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 91 'figure'=>['c'=>"\3",'ac'=>"\1\0\0\0\0\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 92 'footer'=>['c'=>"\3\20\4",'ac'=>"\1",'dd'=>"\0\0\4",'b'=>1,'cp'=>['p']], 93 'form'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']], 94 'h1'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 95 'h2'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 96 'h3'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 97 'h4'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 98 'h5'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 99 'h6'=>['c'=>"\103\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 100 'head'=>['c'=>"\0\0\20",'ac'=>"\20",'dd'=>"\0",'nt'=>1,'b'=>1], 101 'header'=>['c'=>"\3\20\4",'ac'=>"\1",'dd'=>"\0\0\4",'b'=>1,'cp'=>['p']], 102 'hgroup'=>['c'=>"\103",'ac'=>"\0\201",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 103 'hr'=>['c'=>"\1\40",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']], 104 'html'=>['c'=>"\0",'ac'=>"\0\0\20",'dd'=>"\0",'nt'=>1,'b'=>1], 105 'i'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 106 'iframe'=>['c'=>"\57",'ac'=>"\4",'dd'=>"\0"], 107 'img'=>['c'=>"\257\0\40",'c3'=>'@usemap','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 108 'input'=>['c'=>"\217",'c3'=>'@type!="hidden"','c7'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 109 'ins'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1], 110 'kbd'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 111 'label'=>['c'=>"\17\0\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"], 112 'legend'=>['c'=>"\0\0\0\100",'ac'=>"\104",'dd'=>"\0",'b'=>1], 113 'li'=>['c'=>"\0\0\0\0\100",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['li']], 114 'link'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 115 'main'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 116 'map'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1], 117 'mark'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 118 'menu'=>['c'=>"\3\40",'c1'=>'li','ac'=>"\0\240",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 119 'menuitem'=>['c'=>"\0\40",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 120 'meta'=>['c'=>"\25",'c0'=>'@itemprop','c2'=>'@itemprop','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 121 'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"], 122 'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 123 'noscript'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1], 124 'object'=>['c'=>"\47\0\0\2",'ac'=>"\0",'dd'=>"\0",'t'=>1], 125 'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 126 'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']], 127 'option'=>['c'=>"\0\0\2\40",'ac'=>"\0",'dd'=>"\0",'b'=>1,'cp'=>['option']], 128 'output'=>['c'=>"\207",'ac'=>"\4",'dd'=>"\0"], 129 'p'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 130 'picture'=>['c'=>"\45",'ac'=>"\0\200\40",'dd'=>"\0",'nt'=>1], 131 'pre'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'pre'=>1,'b'=>1,'cp'=>['p']], 132 'progress'=>['c'=>"\7\0\1\4",'ac'=>"\4",'dd'=>"\0\0\0\4"], 133 'q'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 134 'rb'=>['c'=>"\0\10",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 135 'rp'=>['c'=>"\0\10",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1,'cp'=>['rp','rt']], 136 'rt'=>['c'=>"\0\10",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']], 137 'rtc'=>['c'=>"\0\10",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 138 'ruby'=>['c'=>"\7",'ac'=>"\4\10",'dd'=>"\0"], 139 's'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 140 'samp'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 141 'script'=>['c'=>"\25\200",'ac'=>"\0",'dd'=>"\0",'to'=>1], 142 'section'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 143 'select'=>['c'=>"\217",'ac'=>"\0\200\2",'dd'=>"\0",'nt'=>1], 144 'slot'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1], 145 'small'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 146 'source'=>['c'=>"\0\0\40\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 147 'span'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 148 'strong'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 149 'style'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1], 150 'sub'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 151 'summary'=>['c'=>"\0\0\0\10",'ac'=>"\104",'dd'=>"\0",'b'=>1], 152 'sup'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 153 'table'=>['c'=>"\3\0\0\0\1",'ac'=>"\0\202",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 154 'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']], 155 'td'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['td','th']], 156 'template'=>['c'=>"\25\200\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1], 157 'textarea'=>['c'=>"\217",'ac'=>"\0",'dd'=>"\0",'pre'=>1,'to'=>1], 158 'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']], 159 'th'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\100\4\4",'b'=>1,'cp'=>['td','th']], 160 'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1], 161 'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime','dd'=>"\0"], 162 'title'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1], 163 'tr'=>['c'=>"\0\2\0\0\40",'ac'=>"\0\200\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']], 164 'track'=>['c'=>"\0\0\0\200",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 165 'u'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 166 'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 167 'var'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 168 'video'=>['c'=>"\57\0\10",'c3'=>'@controls','ac'=>"\0\0\0\220",'ac28'=>'not(@src)','dd'=>"\0\0\10",'t'=>1], 169 'wbr'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1] 170 ]; 171 172 /** 173 * Test whether given child element closes given parent element 174 * 175 * @param DOMElement $child 176 * @param DOMElement $parent 177 * @return bool 178 */ 179 public static function closesParent(DOMElement $child, DOMElement $parent) 180 { 181 $parentName = $parent->nodeName; 182 $childName = $child->nodeName; 183 184 return !empty(self::$htmlElements[$childName]['cp']) && in_array($parentName, self::$htmlElements[$childName]['cp'], true); 185 } 186 187 /** 188 * Test whether given element disallows text nodes 189 * 190 * @param DOMElement $element 191 * @return bool 192 */ 193 public static function disallowsText(DOMElement $element) 194 { 195 return self::hasProperty($element, 'nt'); 196 } 197 198 /** 199 * Return the "allowChild" bitfield for given element 200 * 201 * @param DOMElement $element 202 * @return string 203 */ 204 public static function getAllowChildBitfield(DOMElement $element) 205 { 206 return self::getBitfield($element, 'ac'); 207 } 208 209 /** 210 * Return the "category" bitfield for given element 211 * 212 * @param DOMElement $element 213 * @return string 214 */ 215 public static function getCategoryBitfield(DOMElement $element) 216 { 217 return self::getBitfield($element, 'c'); 218 } 219 220 /** 221 * Return the "denyDescendant" bitfield for given element 222 * 223 * @param DOMElement $element 224 * @return string 225 */ 226 public static function getDenyDescendantBitfield(DOMElement $element) 227 { 228 return self::getBitfield($element, 'dd'); 229 } 230 231 /** 232 * Test whether given element is a block element 233 * 234 * @param DOMElement $element 235 * @return bool 236 */ 237 public static function isBlock(DOMElement $element) 238 { 239 return self::hasProperty($element, 'b'); 240 } 241 242 /** 243 * Test whether given element uses the empty content model 244 * 245 * @param DOMElement $element 246 * @return bool 247 */ 248 public static function isEmpty(DOMElement $element) 249 { 250 return self::hasProperty($element, 'e'); 251 } 252 253 /** 254 * Test whether given element is a formatting element 255 * 256 * @param DOMElement $element 257 * @return bool 258 */ 259 public static function isFormattingElement(DOMElement $element) 260 { 261 return self::hasProperty($element, 'fe'); 262 } 263 264 /** 265 * Test whether given element only accepts text nodes 266 * 267 * @param DOMElement $element 268 * @return bool 269 */ 270 public static function isTextOnly(DOMElement $element) 271 { 272 return self::hasProperty($element, 'to'); 273 } 274 275 /** 276 * Test whether given element uses the transparent content model 277 * 278 * @param DOMElement $element 279 * @return bool 280 */ 281 public static function isTransparent(DOMElement $element) 282 { 283 return self::hasProperty($element, 't'); 284 } 285 286 /** 287 * Test whether given element uses the void content model 288 * 289 * @param DOMElement $element 290 * @return bool 291 */ 292 public static function isVoid(DOMElement $element) 293 { 294 return self::hasProperty($element, 'v'); 295 } 296 297 /** 298 * Test whether given element preserves whitespace in its content 299 * 300 * @param DOMElement $element 301 * @return bool 302 */ 303 public static function preservesWhitespace(DOMElement $element) 304 { 305 return self::hasProperty($element, 'pre'); 306 } 307 308 /** 309 * Evaluate an XPath query using given element as context node 310 * 311 * @param string $query XPath query 312 * @param DOMElement $element Context node 313 * @return bool 314 */ 315 protected static function evaluate($query, DOMElement $element) 316 { 317 $xpath = new DOMXPath($element->ownerDocument); 318 319 return $xpath->evaluate('boolean(' . $query . ')', $element); 320 } 321 322 /** 323 * Get the bitfield value for a given element 324 * 325 * @param DOMElement $element Context node 326 * @param string $name Bitfield name: either 'c', 'ac' or 'dd' 327 * @return string 328 */ 329 protected static function getBitfield(DOMElement $element, $name) 330 { 331 $props = self::getProperties($element); 332 $bitfield = self::toBin($props[$name]); 333 334 // For each bit set to 1, test whether there is an XPath condition to it and whether it is 335 // fulfilled. If not, turn the bit to 0 336 foreach (array_keys(array_filter(str_split($bitfield, 1))) as $bitNumber) 337 { 338 $conditionName = $name . $bitNumber; 339 if (isset($props[$conditionName]) && !self::evaluate($props[$conditionName], $element)) 340 { 341 $bitfield[$bitNumber] = '0'; 342 } 343 } 344 345 return self::toRaw($bitfield); 346 } 347 348 /** 349 * Return the properties associated with given element 350 * 351 * Returns span's properties if the element is not defined 352 * 353 * @param DOMElement $element 354 * @return array 355 */ 356 protected static function getProperties(DOMElement $element) 357 { 358 return self::$htmlElements[$element->nodeName] ?? self::$htmlElements['span']; 359 } 360 361 /** 362 * Test whether given element has given property in context 363 * 364 * @param DOMElement $element Context node 365 * @param string $propName Property name, see self::$htmlElements 366 * @return bool 367 */ 368 protected static function hasProperty(DOMElement $element, $propName) 369 { 370 $props = self::getProperties($element); 371 372 return !empty($props[$propName]) && (!isset($props[$propName . '?']) || self::evaluate($props[$propName . '?'], $element)); 373 } 374 375 /** 376 * Convert a raw string to a series of 0 and 1 in LSB order 377 * 378 * @param string $raw 379 * @return string 380 */ 381 protected static function toBin($raw) 382 { 383 $bin = ''; 384 foreach (str_split($raw, 1) as $char) 385 { 386 $bin .= strrev(substr('0000000' . decbin(ord($char)), -8)); 387 } 388 389 return $bin; 390 } 391 392 /** 393 * Convert a series of 0 and 1 in LSB order to a raw string 394 * 395 * @param string $bin 396 * @return string 397 */ 398 protected static function toRaw($bin) 399 { 400 return implode('', array_map('chr', array_map('bindec', array_map('strrev', str_split($bin, 8))))); 401 } 402 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Jun 23 12:25:44 2024 | Cross-referenced by PHPXref 0.7.1 |