[ Index ] |
PHP Cross Reference of phpBB-3.3.14-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @package s9e\TextFormatter 5 * @copyright Copyright (c) 2010-2022 The s9e authors 6 * @license http://www.opensource.org/licenses/mit-license.php The MIT License 7 */ 8 namespace s9e\TextFormatter; 9 10 use DOMDocument; 11 use DOMXPath; 12 13 abstract class Utils 14 { 15 /** 16 * Return every value used in given attribute from given tag 17 * 18 * Will include duplicate values 19 * 20 * @param string $xml Parsed text 21 * @param string $tagName Target tag's name 22 * @param string $attrName Target attribute's name 23 * @return string[] Attribute values 24 */ 25 public static function getAttributeValues($xml, $tagName, $attrName) 26 { 27 $values = []; 28 if (strpos($xml, $tagName) !== false) 29 { 30 $regexp = '((?<=<)' . preg_quote($tagName) . '(?= )[^>]*? ' . preg_quote($attrName) . '="\\K[^"]*+)'; 31 preg_match_all($regexp, $xml, $matches); 32 foreach ($matches[0] as $value) 33 { 34 $values[] = html_entity_decode($value, ENT_QUOTES, 'UTF-8'); 35 } 36 } 37 38 return $values; 39 } 40 41 /** 42 * Replace Unicode characters outside the BMP with XML entities 43 * 44 * @param string $str Original string 45 * @return string String with SMP characters encoded 46 */ 47 public static function encodeUnicodeSupplementaryCharacters($str) 48 { 49 return preg_replace_callback( 50 '([\\xF0-\\xF4]...)S', 51 __CLASS__ . '::encodeUnicodeSupplementaryCharactersCallback', 52 $str 53 ); 54 } 55 56 /** 57 * Strip the formatting of an intermediate representation and return plain text 58 * 59 * This will remove start tags and end tags but will keep the text content of everything else 60 * 61 * @param string $xml Intermediate representation 62 * @return string Plain text 63 */ 64 public static function removeFormatting($xml) 65 { 66 $dom = self::loadXML($xml); 67 $xpath = new DOMXPath($dom); 68 foreach ($xpath->query('//e | //s') as $node) 69 { 70 $node->parentNode->removeChild($node); 71 } 72 73 return $dom->documentElement->textContent; 74 } 75 76 /** 77 * Remove all tags at given nesting level 78 * 79 * @param string $xml Intermediate representation 80 * @param string $tagName Tag's name (case-sensitive) 81 * @param integer $nestingLevel Minimum nesting level 82 * @return string Updated intermediate representation 83 */ 84 public static function removeTag($xml, $tagName, $nestingLevel = 0) 85 { 86 if (strpos($xml, $tagName) === false) 87 { 88 return $xml; 89 } 90 91 $dom = self::loadXML($xml); 92 $xpath = new DOMXPath($dom); 93 $query = '//' . $tagName . '[count(ancestor::' . $tagName . ') >= ' . $nestingLevel . ']'; 94 $nodes = $xpath->query($query); 95 foreach ($nodes as $node) 96 { 97 $node->parentNode->removeChild($node); 98 } 99 100 return self::saveXML($dom); 101 } 102 103 /** 104 * Replace the attributes of all tags of given name in given XML 105 * 106 * @param string $xml Original XML 107 * @param string $tagName Target tag's name 108 * @param callable $callback Callback used to process attributes. Receives the old attributes 109 * as an array, should return the new attributes as an array 110 * @return string Modified XML 111 */ 112 public static function replaceAttributes($xml, $tagName, callable $callback) 113 { 114 if (strpos($xml, $tagName) === false) 115 { 116 return $xml; 117 } 118 119 return preg_replace_callback( 120 '((?<=<)' . preg_quote($tagName) . '(?=[ />])\\K[^>]*+)', 121 function ($m) use ($callback) 122 { 123 $str = self::serializeAttributes($callback(self::parseAttributes($m[0]))); 124 if (substr($m[0], -1) === '/') 125 { 126 $str .= '/'; 127 } 128 129 return $str; 130 }, 131 $xml 132 ); 133 } 134 135 /** 136 * Encode given Unicode character into an XML entity 137 * 138 * @param string[] $m Array of captures 139 * @return string Encoded character 140 */ 141 protected static function encodeUnicodeSupplementaryCharactersCallback(array $m) 142 { 143 $utf8 = $m[0]; 144 $cp = (ord($utf8[0]) << 18) + (ord($utf8[1]) << 12) + (ord($utf8[2]) << 6) + ord($utf8[3]) - 0x3C82080; 145 146 return '&#' . $cp . ';'; 147 } 148 149 /** 150 * Create a return a new DOMDocument loaded with given XML 151 * 152 * @param string $xml Source XML 153 * @return DOMDocument 154 */ 155 protected static function loadXML($xml) 156 { 157 // Activate small nodes allocation and relax LibXML's hardcoded limits if applicable 158 $flags = (LIBXML_VERSION >= 20700) ? LIBXML_COMPACT | LIBXML_PARSEHUGE : 0; 159 160 $dom = new DOMDocument; 161 $dom->loadXML($xml, $flags); 162 163 return $dom; 164 } 165 166 /** 167 * Parse the attributes contained in given XML 168 * 169 * @param string $xml XML string, normally a start tag 170 * @return array Associative array of attribute values 171 */ 172 protected static function parseAttributes($xml) 173 { 174 $attributes = []; 175 if (strpos($xml, '="') !== false) 176 { 177 preg_match_all('(([^ =]++)="([^"]*))', $xml, $matches); 178 foreach ($matches[1] as $i => $attrName) 179 { 180 $attributes[$attrName] = html_entity_decode($matches[2][$i], ENT_QUOTES, 'UTF-8'); 181 } 182 } 183 184 return $attributes; 185 } 186 187 /** 188 * Serialize given DOMDocument 189 * 190 * @param DOMDocument $dom 191 * @return string 192 */ 193 protected static function saveXML(DOMDocument $dom) 194 { 195 return self::encodeUnicodeSupplementaryCharacters($dom->saveXML($dom->documentElement)); 196 } 197 198 /** 199 * Serialize an array of attribute values 200 * 201 * @param array $attributes Associative array of attribute values 202 * @return string Attributes, sorted by name and serialized to XML 203 */ 204 protected static function serializeAttributes(array $attributes) 205 { 206 $xml = ''; 207 ksort($attributes); 208 foreach ($attributes as $attrName => $attrValue) 209 { 210 $xml .= ' ' . htmlspecialchars($attrName, ENT_QUOTES) . '="' . htmlspecialchars($attrValue, ENT_COMPAT) . '"'; 211 } 212 213 // Normalize control characters to what the parser would normally produce 214 $xml = preg_replace('/\\r\\n?/', "\n", $xml); 215 $xml = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $xml); 216 $xml = str_replace("\n", ' ', $xml); 217 218 return self::encodeUnicodeSupplementaryCharacters($xml); 219 } 220 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Mon Nov 25 19:05:08 2024 | Cross-referenced by PHPXref 0.7.1 |