[ Index ] |
PHP Cross Reference of phpBB-3.3.14-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @package s9e\TextFormatter 5 * @copyright Copyright (c) 2010-2022 The s9e authors 6 * @license http://www.opensource.org/licenses/mit-license.php The MIT License 7 */ 8 namespace s9e\TextFormatter\Plugins\Preg; 9 10 use DOMAttr; 11 use DOMText; 12 use DOMXPath; 13 use Exception; 14 use InvalidArgumentException; 15 use s9e\TextFormatter\Configurator\Helpers\NodeLocator; 16 use s9e\TextFormatter\Configurator\Helpers\RegexpParser; 17 use s9e\TextFormatter\Configurator\Helpers\TemplateLoader; 18 use s9e\TextFormatter\Configurator\Helpers\TemplateModifier; 19 use s9e\TextFormatter\Configurator\Items\Regexp; 20 use s9e\TextFormatter\Configurator\Items\Tag; 21 use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor; 22 use s9e\TextFormatter\Configurator\Validators\TagName; 23 use s9e\TextFormatter\Plugins\ConfiguratorBase; 24 25 class Configurator extends ConfiguratorBase 26 { 27 /** 28 * @var array[] Captures from current regexp 29 */ 30 protected $captures; 31 32 /** 33 * @var array[] List of [tagName, regexp, passthroughIdx] 34 */ 35 protected $collection = []; 36 37 /** 38 * @var string Delimiter used in current regexp 39 */ 40 protected $delimiter; 41 42 /** 43 * @var string Non-D modifiers used in current regexp 44 */ 45 protected $modifiers; 46 47 /** 48 * @var array References used in current template 49 */ 50 protected $references; 51 52 /** 53 * @var string Regexp used to find references in the templates. We check that the reference is 54 * not preceded with an odd number of backslashes 55 */ 56 protected $referencesRegexp = '((?<!\\\\)(?:\\\\\\\\)*\\K(?:[$\\\\]\\d+|\\$\\{\\d+\\}))S'; 57 58 /** 59 * {@inheritdoc} 60 */ 61 public function asConfig() 62 { 63 if (!count($this->collection)) 64 { 65 return; 66 } 67 68 $pregs = []; 69 foreach ($this->collection as list($tagName, $regexp, $passthroughIdx)) 70 { 71 $captures = RegexpParser::getCaptureNames($regexp); 72 $pregs[] = [$tagName, new Regexp($regexp, true), $passthroughIdx, $captures]; 73 } 74 75 return ['generics' => $pregs]; 76 } 77 78 /** 79 * {@inheritdoc} 80 */ 81 public function getJSHints() 82 { 83 $hasPassthrough = false; 84 foreach ($this->collection as list($tagName, $regexp, $passthroughIdx)) 85 { 86 if ($passthroughIdx) 87 { 88 $hasPassthrough = true; 89 break; 90 } 91 } 92 93 return ['PREG_HAS_PASSTHROUGH' => $hasPassthrough]; 94 } 95 96 /** 97 * Configure a pattern-based match 98 * 99 * @param string $regexp Regexp to be used by the parser 100 * @param string $tagName Name of the tag that holds the matched text 101 * @return void 102 */ 103 public function match($regexp, $tagName) 104 { 105 $tagName = TagName::normalize($tagName); 106 $passthroughIdx = 0; 107 $this->parseRegexp($regexp); 108 foreach ($this->captures as $i => $capture) 109 { 110 if (!$this->isCatchAll((string) $capture['expr'])) 111 { 112 continue; 113 } 114 $passthroughIdx = $i; 115 } 116 117 $this->collection[] = [$tagName, $regexp, $passthroughIdx]; 118 } 119 120 /** 121 * Configure a pattern-based replacement 122 * 123 * @param string $regexp Regexp to be used by the parser 124 * @param string $template Template to be used for rendering 125 * @param string $tagName Name of the tag to create. A name based on the regexp is 126 * automatically generated if none is provided 127 * @return Tag The tag created to represent this replacement 128 */ 129 public function replace($regexp, $template, $tagName = null) 130 { 131 if (!isset($tagName)) 132 { 133 $tagName = 'PREG_' . strtoupper(dechex(crc32($regexp))); 134 } 135 $this->parseRegexp($regexp); 136 $this->parseTemplate($template); 137 138 $passthroughIdx = $this->getPassthroughCapture(); 139 if ($passthroughIdx) 140 { 141 $this->captures[$passthroughIdx]['passthrough'] = true; 142 } 143 144 $regexp = $this->fixUnnamedCaptures($regexp); 145 $template = $this->convertTemplate($template, $passthroughIdx); 146 147 $this->collection[] = [$tagName, $regexp, $passthroughIdx]; 148 149 return $this->createTag($tagName, $template); 150 } 151 152 /** 153 * Add given attribute to given tag based on parsed captures 154 * 155 * @param Tag $tag 156 * @param string $attrName 157 * @return void 158 */ 159 protected function addAttribute(Tag $tag, $attrName) 160 { 161 $isUrl = false; 162 $exprs = []; 163 foreach ($this->captures as $key => $capture) 164 { 165 if ($capture['name'] !== $attrName) 166 { 167 continue; 168 } 169 $exprs[] = $capture['expr']; 170 if (isset($this->references['asUrl'][$key])) 171 { 172 $isUrl = true; 173 } 174 } 175 $exprs = array_unique($exprs); 176 177 $regexp = $this->delimiter . '^'; 178 $regexp .= (count($exprs) === 1) ? $exprs[0] : '(?:' . implode('|', $exprs) . ')'; 179 $regexp .= '$' . $this->delimiter . 'D' . $this->modifiers; 180 181 $attribute = $tag->attributes->add($attrName); 182 183 $filter = $this->configurator->attributeFilters['#regexp']; 184 $filter->setRegexp($regexp); 185 $attribute->filterChain[] = $filter; 186 187 if ($isUrl) 188 { 189 $filter = $this->configurator->attributeFilters['#url']; 190 $attribute->filterChain[] = $filter; 191 } 192 } 193 194 /** 195 * Convert a preg-style replacement to a template 196 * 197 * @param string $template Original template 198 * @param integer $passthroughIdx Index of the passthrough capture 199 * @return string Modified template 200 */ 201 protected function convertTemplate($template, $passthroughIdx) 202 { 203 // Replace numeric references in the template with the value of the corresponding attribute 204 // values or passthrough 205 $template = TemplateModifier::replaceTokens( 206 $template, 207 $this->referencesRegexp, 208 function ($m, $node) use ($passthroughIdx) 209 { 210 $key = (int) trim($m[0], '\\${}'); 211 if ($key === 0) 212 { 213 // $0 copies the whole textContent 214 return ['expression', '.']; 215 } 216 if ($key === $passthroughIdx && $node instanceof DOMText) 217 { 218 // Passthrough capture, does not include start/end tags 219 return ['passthrough']; 220 } 221 if (isset($this->captures[$key]['name'])) 222 { 223 // Normal capture, replaced by the equivalent expression 224 return ['expression', '@' . $this->captures[$key]['name']]; 225 } 226 227 // Non-existent captures are simply ignored, similarly to preg_replace() 228 return ['literal', '']; 229 } 230 ); 231 232 // Unescape backslashes and special characters in the template 233 $template = TemplateModifier::replaceTokens( 234 $template, 235 '(\\\\+[0-9${\\\\])', 236 function ($m) 237 { 238 return ['literal', stripslashes($m[0])]; 239 } 240 ); 241 242 return $template; 243 } 244 245 /** 246 * Create the tag that matches current regexp 247 * 248 * @param string $tagName 249 * @param string $template 250 * @return Tag 251 */ 252 protected function createTag($tagName, $template) 253 { 254 $tag = new Tag; 255 foreach ($this->captures as $key => $capture) 256 { 257 if (!isset($capture['name'])) 258 { 259 continue; 260 } 261 262 $attrName = $capture['name']; 263 if (isset($tag->attributes[$attrName])) 264 { 265 continue; 266 } 267 268 $this->addAttribute($tag, $attrName); 269 } 270 $tag->template = $template; 271 272 // Normalize the tag's template 273 $this->configurator->templateNormalizer->normalizeTag($tag); 274 275 // Check the safeness of this tag 276 $this->configurator->templateChecker->checkTag($tag); 277 278 return $this->configurator->tags->add($tagName, $tag); 279 } 280 281 /** 282 * Give a name to unnamed captures that are referenced in current replacement 283 * 284 * @param string $regexp Original regexp 285 * @return string Modified regexp 286 */ 287 protected function fixUnnamedCaptures($regexp) 288 { 289 $keys = []; 290 foreach ($this->references['anywhere'] as $key) 291 { 292 $capture = $this->captures[$key]; 293 if (!$key || isset($capture['name'])) 294 { 295 continue; 296 } 297 // Give the capture a name if it's used as URL or it's not a passthrough 298 if (isset($this->references['asUrl'][$key]) || !isset($capture['passthrough'])) 299 { 300 $keys[] = $key; 301 } 302 } 303 304 // Alter the original regexp to inject the subpatterns' names. The position is equal to the 305 // subpattern's position plus 2, to account for the delimiter at the start of the regexp and 306 // the opening parenthesis of the subpattern. Also, we need to process them in reverse order 307 // so that replacements don't change the position of subsequent subpatterns 308 rsort($keys); 309 foreach ($keys as $key) 310 { 311 $name = '_' . $key; 312 $pos = $this->captures[$key]['pos']; 313 $regexp = substr_replace($regexp, "?'" . $name . "'", 2 + $pos, 0); 314 $this->captures[$key]['name'] = $name; 315 } 316 317 return $regexp; 318 } 319 320 /** 321 * Get the index of the capture used for passthrough in current replacement 322 * 323 * @return integer 324 */ 325 protected function getPassthroughCapture() 326 { 327 $passthrough = 0; 328 foreach ($this->references['inText'] as $key) 329 { 330 if (!$this->isCatchAll((string) $this->captures[$key]['expr'])) 331 { 332 // Ignore if it's not a catch-all expression such as .*? 333 continue; 334 } 335 if ($passthrough) 336 { 337 // Abort if there's more than 1 possible passthrough 338 $passthrough = 0; 339 break; 340 } 341 $passthrough = (int) $key; 342 } 343 344 return $passthrough; 345 } 346 347 /** 348 * Parse a regexp and return its info 349 * 350 * @param string $regexp 351 * @return array 352 */ 353 protected function getRegexpInfo($regexp) 354 { 355 if (@preg_match_all($regexp, '') === false) 356 { 357 throw new InvalidArgumentException('Invalid regexp'); 358 } 359 360 return RegexpParser::parse($regexp); 361 } 362 363 /** 364 * Test whether given expression is a catch-all expression such as .*? 365 * 366 * @param string $expr Subpattern 367 * @return bool 368 */ 369 protected function isCatchAll($expr) 370 { 371 return (bool) preg_match('(^\\.[*+]\\??$)D', $expr); 372 } 373 374 /** 375 * Parse given regexp and store its information 376 * 377 * @param string $regexp 378 * @return void 379 */ 380 protected function parseRegexp($regexp) 381 { 382 $this->captures = [['name' => null, 'expr' => null]]; 383 $regexpInfo = $this->getRegexpInfo($regexp); 384 $this->delimiter = $regexpInfo['delimiter']; 385 $this->modifiers = str_replace('D', '', $regexpInfo['modifiers']); 386 foreach ($regexpInfo['tokens'] as $token) 387 { 388 if ($token['type'] !== 'capturingSubpatternStart') 389 { 390 continue; 391 } 392 $this->captures[] = [ 393 'pos' => $token['pos'], 394 'name' => $token['name'] ?? null, 395 'expr' => $token['content'] 396 ]; 397 } 398 } 399 400 /** 401 * Parse given template and store the references it contains 402 * 403 * @param string $template 404 * @return void 405 */ 406 protected function parseTemplate($template) 407 { 408 $this->references = [ 409 'anywhere' => [], 410 'asUrl' => [], 411 'inText' => [] 412 ]; 413 414 preg_match_all($this->referencesRegexp, $template, $matches); 415 foreach ($matches[0] as $match) 416 { 417 $key = trim($match, '\\${}'); 418 $this->references['anywhere'][$key] = $key; 419 } 420 421 $dom = TemplateLoader::load($template); 422 $xpath = new DOMXPath($dom); 423 foreach ($xpath->query('//text()') as $node) 424 { 425 preg_match_all($this->referencesRegexp, $node->textContent, $matches); 426 foreach ($matches[0] as $match) 427 { 428 $key = trim($match, '\\${}'); 429 $this->references['inText'][$key] = $key; 430 } 431 } 432 433 foreach (NodeLocator::getURLNodes($dom) as $node) 434 { 435 // We only bother with literal attributes that start with a capture 436 if ($node instanceof DOMAttr 437 && preg_match('(^(?:[$\\\\]\\d+|\\$\\{\\d+\\}))', trim($node->value), $m)) 438 { 439 $key = trim($m[0], '\\${}'); 440 $this->references['asUrl'][$key] = $key; 441 } 442 } 443 444 $this->removeUnknownReferences(); 445 } 446 447 /** 448 * Remove references that do not correspond to an existing capture 449 * 450 * @return void 451 */ 452 protected function removeUnknownReferences() 453 { 454 foreach ($this->references as &$references) 455 { 456 $references = array_intersect_key($references, $this->captures); 457 } 458 } 459 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Mon Nov 25 19:05:08 2024 | Cross-referenced by PHPXref 0.7.1 |