[ Index ] |
PHP Cross Reference of phpBB-3.3.14-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12 namespace Symfony\Polyfill\Intl\Idn; 13 14 use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges; 15 use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex; 16 17 /** 18 * @see https://www.unicode.org/reports/tr46/ 19 * 20 * @internal 21 */ 22 final class Idn 23 { 24 public const ERROR_EMPTY_LABEL = 1; 25 public const ERROR_LABEL_TOO_LONG = 2; 26 public const ERROR_DOMAIN_NAME_TOO_LONG = 4; 27 public const ERROR_LEADING_HYPHEN = 8; 28 public const ERROR_TRAILING_HYPHEN = 0x10; 29 public const ERROR_HYPHEN_3_4 = 0x20; 30 public const ERROR_LEADING_COMBINING_MARK = 0x40; 31 public const ERROR_DISALLOWED = 0x80; 32 public const ERROR_PUNYCODE = 0x100; 33 public const ERROR_LABEL_HAS_DOT = 0x200; 34 public const ERROR_INVALID_ACE_LABEL = 0x400; 35 public const ERROR_BIDI = 0x800; 36 public const ERROR_CONTEXTJ = 0x1000; 37 public const ERROR_CONTEXTO_PUNCTUATION = 0x2000; 38 public const ERROR_CONTEXTO_DIGITS = 0x4000; 39 40 public const INTL_IDNA_VARIANT_2003 = 0; 41 public const INTL_IDNA_VARIANT_UTS46 = 1; 42 43 public const IDNA_DEFAULT = 0; 44 public const IDNA_ALLOW_UNASSIGNED = 1; 45 public const IDNA_USE_STD3_RULES = 2; 46 public const IDNA_CHECK_BIDI = 4; 47 public const IDNA_CHECK_CONTEXTJ = 8; 48 public const IDNA_NONTRANSITIONAL_TO_ASCII = 16; 49 public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32; 50 51 public const MAX_DOMAIN_SIZE = 253; 52 public const MAX_LABEL_SIZE = 63; 53 54 public const BASE = 36; 55 public const TMIN = 1; 56 public const TMAX = 26; 57 public const SKEW = 38; 58 public const DAMP = 700; 59 public const INITIAL_BIAS = 72; 60 public const INITIAL_N = 128; 61 public const DELIMITER = '-'; 62 public const MAX_INT = 2147483647; 63 64 /** 65 * Contains the numeric value of a basic code point (for use in representing integers) in the 66 * range 0 to BASE-1, or -1 if b is does not represent a value. 67 * 68 * @var array<int, int> 69 */ 70 private static $basicToDigit = [ 71 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 72 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 73 74 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 75 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, 76 77 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 78 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 79 80 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 81 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 82 83 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 85 86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 87 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 88 89 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 91 92 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 93 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 94 ]; 95 96 /** 97 * @var array<int, int> 98 */ 99 private static $virama; 100 101 /** 102 * @var array<int, string> 103 */ 104 private static $mapped; 105 106 /** 107 * @var array<int, bool> 108 */ 109 private static $ignored; 110 111 /** 112 * @var array<int, string> 113 */ 114 private static $deviation; 115 116 /** 117 * @var array<int, bool> 118 */ 119 private static $disallowed; 120 121 /** 122 * @var array<int, string> 123 */ 124 private static $disallowed_STD3_mapped; 125 126 /** 127 * @var array<int, bool> 128 */ 129 private static $disallowed_STD3_valid; 130 131 /** 132 * @var bool 133 */ 134 private static $mappingTableLoaded = false; 135 136 /** 137 * @see https://www.unicode.org/reports/tr46/#ToASCII 138 * 139 * @param string $domainName 140 * @param int $options 141 * @param int $variant 142 * @param array $idna_info 143 * 144 * @return string|false 145 */ 146 public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = []) 147 { 148 if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) { 149 @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED); 150 } 151 152 $options = [ 153 'CheckHyphens' => true, 154 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI), 155 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ), 156 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES), 157 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII), 158 'VerifyDnsLength' => true, 159 ]; 160 $info = new Info(); 161 $labels = self::process((string) $domainName, $options, $info); 162 163 foreach ($labels as $i => $label) { 164 // Only convert labels to punycode that contain non-ASCII code points 165 if (1 === preg_match('/[^\x00-\x7F]/', $label)) { 166 try { 167 $label = 'xn--'.self::punycodeEncode($label); 168 } catch (\Exception $e) { 169 $info->errors |= self::ERROR_PUNYCODE; 170 } 171 172 $labels[$i] = $label; 173 } 174 } 175 176 if ($options['VerifyDnsLength']) { 177 self::validateDomainAndLabelLength($labels, $info); 178 } 179 180 $idna_info = [ 181 'result' => implode('.', $labels), 182 'isTransitionalDifferent' => $info->transitionalDifferent, 183 'errors' => $info->errors, 184 ]; 185 186 return 0 === $info->errors ? $idna_info['result'] : false; 187 } 188 189 /** 190 * @see https://www.unicode.org/reports/tr46/#ToUnicode 191 * 192 * @param string $domainName 193 * @param int $options 194 * @param int $variant 195 * @param array $idna_info 196 * 197 * @return string|false 198 */ 199 public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = []) 200 { 201 if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) { 202 @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED); 203 } 204 205 $info = new Info(); 206 $labels = self::process((string) $domainName, [ 207 'CheckHyphens' => true, 208 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI), 209 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ), 210 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES), 211 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE), 212 ], $info); 213 $idna_info = [ 214 'result' => implode('.', $labels), 215 'isTransitionalDifferent' => $info->transitionalDifferent, 216 'errors' => $info->errors, 217 ]; 218 219 return 0 === $info->errors ? $idna_info['result'] : false; 220 } 221 222 /** 223 * @param string $label 224 * 225 * @return bool 226 */ 227 private static function isValidContextJ(array $codePoints, $label) 228 { 229 if (!isset(self::$virama)) { 230 self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php'; 231 } 232 233 $offset = 0; 234 235 foreach ($codePoints as $i => $codePoint) { 236 if (0x200C !== $codePoint && 0x200D !== $codePoint) { 237 continue; 238 } 239 240 if (!isset($codePoints[$i - 1])) { 241 return false; 242 } 243 244 // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; 245 if (isset(self::$virama[$codePoints[$i - 1]])) { 246 continue; 247 } 248 249 // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then 250 // True; 251 // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}] 252 if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) { 253 $offset += \strlen($matches[1][0]); 254 255 continue; 256 } 257 258 return false; 259 } 260 261 return true; 262 } 263 264 /** 265 * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap 266 * 267 * @param string $input 268 * @param array<string, bool> $options 269 * 270 * @return string 271 */ 272 private static function mapCodePoints($input, array $options, Info $info) 273 { 274 $str = ''; 275 $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules']; 276 $transitional = $options['Transitional_Processing']; 277 278 foreach (self::utf8Decode($input) as $codePoint) { 279 $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules); 280 281 switch ($data['status']) { 282 case 'disallowed': 283 $info->errors |= self::ERROR_DISALLOWED; 284 285 // no break. 286 287 case 'valid': 288 $str .= mb_chr($codePoint, 'utf-8'); 289 290 break; 291 292 case 'ignored': 293 // Do nothing. 294 break; 295 296 case 'mapped': 297 $str .= $data['mapping']; 298 299 break; 300 301 case 'deviation': 302 $info->transitionalDifferent = true; 303 $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8')); 304 305 break; 306 } 307 } 308 309 return $str; 310 } 311 312 /** 313 * @see https://www.unicode.org/reports/tr46/#Processing 314 * 315 * @param string $domain 316 * @param array<string, bool> $options 317 * 318 * @return array<int, string> 319 */ 320 private static function process($domain, array $options, Info $info) 321 { 322 // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and 323 // we need to respect the VerifyDnsLength option. 324 $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength']; 325 326 if ($checkForEmptyLabels && '' === $domain) { 327 $info->errors |= self::ERROR_EMPTY_LABEL; 328 329 return [$domain]; 330 } 331 332 // Step 1. Map each code point in the domain name string 333 $domain = self::mapCodePoints($domain, $options, $info); 334 335 // Step 2. Normalize the domain name string to Unicode Normalization Form C. 336 if (!\Normalizer::isNormalized($domain, \Normalizer::FORM_C)) { 337 $domain = \Normalizer::normalize($domain, \Normalizer::FORM_C); 338 } 339 340 // Step 3. Break the string into labels at U+002E (.) FULL STOP. 341 $labels = explode('.', $domain); 342 $lastLabelIndex = \count($labels) - 1; 343 344 // Step 4. Convert and validate each label in the domain name string. 345 foreach ($labels as $i => $label) { 346 $validationOptions = $options; 347 348 if ('xn--' === substr($label, 0, 4)) { 349 try { 350 $label = self::punycodeDecode(substr($label, 4)); 351 } catch (\Exception $e) { 352 $info->errors |= self::ERROR_PUNYCODE; 353 354 continue; 355 } 356 357 $validationOptions['Transitional_Processing'] = false; 358 $labels[$i] = $label; 359 } 360 361 self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex); 362 } 363 364 if ($info->bidiDomain && !$info->validBidiDomain) { 365 $info->errors |= self::ERROR_BIDI; 366 } 367 368 // Any input domain name string that does not record an error has been successfully 369 // processed according to this specification. Conversely, if an input domain_name string 370 // causes an error, then the processing of the input domain_name string fails. Determining 371 // what to do with error input is up to the caller, and not in the scope of this document. 372 return $labels; 373 } 374 375 /** 376 * @see https://tools.ietf.org/html/rfc5893#section-2 377 * 378 * @param string $label 379 */ 380 private static function validateBidiLabel($label, Info $info) 381 { 382 if (1 === preg_match(Regex::RTL_LABEL, $label)) { 383 $info->bidiDomain = true; 384 385 // Step 1. The first character must be a character with Bidi property L, R, or AL. 386 // If it has the R or AL property, it is an RTL label 387 if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) { 388 $info->validBidiDomain = false; 389 390 return; 391 } 392 393 // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, 394 // CS, ET, ON, BN, or NSM are allowed. 395 if (1 === preg_match(Regex::BIDI_STEP_2, $label)) { 396 $info->validBidiDomain = false; 397 398 return; 399 } 400 401 // Step 3. In an RTL label, the end of the label must be a character with Bidi property 402 // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM. 403 if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) { 404 $info->validBidiDomain = false; 405 406 return; 407 } 408 409 // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa. 410 if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) { 411 $info->validBidiDomain = false; 412 413 return; 414 } 415 416 return; 417 } 418 419 // We are a LTR label 420 // Step 1. The first character must be a character with Bidi property L, R, or AL. 421 // If it has the L property, it is an LTR label. 422 if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) { 423 $info->validBidiDomain = false; 424 425 return; 426 } 427 428 // Step 5. In an LTR label, only characters with the Bidi properties L, EN, 429 // ES, CS, ET, ON, BN, or NSM are allowed. 430 if (1 === preg_match(Regex::BIDI_STEP_5, $label)) { 431 $info->validBidiDomain = false; 432 433 return; 434 } 435 436 // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or 437 // EN, followed by zero or more characters with Bidi property NSM. 438 if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) { 439 $info->validBidiDomain = false; 440 441 return; 442 } 443 } 444 445 /** 446 * @param array<int, string> $labels 447 */ 448 private static function validateDomainAndLabelLength(array $labels, Info $info) 449 { 450 $maxDomainSize = self::MAX_DOMAIN_SIZE; 451 $length = \count($labels); 452 453 // Number of "." delimiters. 454 $domainLength = $length - 1; 455 456 // If the last label is empty and it is not the first label, then it is the root label. 457 // Increase the max size by 1, making it 254, to account for the root label's "." 458 // delimiter. This also means we don't need to check the last label's length for being too 459 // long. 460 if ($length > 1 && '' === $labels[$length - 1]) { 461 ++$maxDomainSize; 462 --$length; 463 } 464 465 for ($i = 0; $i < $length; ++$i) { 466 $bytes = \strlen($labels[$i]); 467 $domainLength += $bytes; 468 469 if ($bytes > self::MAX_LABEL_SIZE) { 470 $info->errors |= self::ERROR_LABEL_TOO_LONG; 471 } 472 } 473 474 if ($domainLength > $maxDomainSize) { 475 $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG; 476 } 477 } 478 479 /** 480 * @see https://www.unicode.org/reports/tr46/#Validity_Criteria 481 * 482 * @param string $label 483 * @param array<string, bool> $options 484 * @param bool $canBeEmpty 485 */ 486 private static function validateLabel($label, Info $info, array $options, $canBeEmpty) 487 { 488 if ('' === $label) { 489 if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) { 490 $info->errors |= self::ERROR_EMPTY_LABEL; 491 } 492 493 return; 494 } 495 496 // Step 1. The label must be in Unicode Normalization Form C. 497 if (!\Normalizer::isNormalized($label, \Normalizer::FORM_C)) { 498 $info->errors |= self::ERROR_INVALID_ACE_LABEL; 499 } 500 501 $codePoints = self::utf8Decode($label); 502 503 if ($options['CheckHyphens']) { 504 // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character 505 // in both the thrid and fourth positions. 506 if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) { 507 $info->errors |= self::ERROR_HYPHEN_3_4; 508 } 509 510 // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D 511 // HYPHEN-MINUS character. 512 if ('-' === substr($label, 0, 1)) { 513 $info->errors |= self::ERROR_LEADING_HYPHEN; 514 } 515 516 if ('-' === substr($label, -1, 1)) { 517 $info->errors |= self::ERROR_TRAILING_HYPHEN; 518 } 519 } 520 521 // Step 4. The label must not contain a U+002E (.) FULL STOP. 522 if (false !== strpos($label, '.')) { 523 $info->errors |= self::ERROR_LABEL_HAS_DOT; 524 } 525 526 // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark. 527 if (1 === preg_match(Regex::COMBINING_MARK, $label)) { 528 $info->errors |= self::ERROR_LEADING_COMBINING_MARK; 529 } 530 531 // Step 6. Each code point in the label must only have certain status values according to 532 // Section 5, IDNA Mapping Table: 533 $transitional = $options['Transitional_Processing']; 534 $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules']; 535 536 foreach ($codePoints as $codePoint) { 537 $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules); 538 $status = $data['status']; 539 540 if ('valid' === $status || (!$transitional && 'deviation' === $status)) { 541 continue; 542 } 543 544 $info->errors |= self::ERROR_DISALLOWED; 545 546 break; 547 } 548 549 // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in 550 // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA) 551 // [IDNA2008]. 552 if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) { 553 $info->errors |= self::ERROR_CONTEXTJ; 554 } 555 556 // Step 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must 557 // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2. 558 if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) { 559 self::validateBidiLabel($label, $info); 560 } 561 } 562 563 /** 564 * @see https://tools.ietf.org/html/rfc3492#section-6.2 565 * 566 * @param string $input 567 * 568 * @return string 569 */ 570 private static function punycodeDecode($input) 571 { 572 $n = self::INITIAL_N; 573 $out = 0; 574 $i = 0; 575 $bias = self::INITIAL_BIAS; 576 $lastDelimIndex = strrpos($input, self::DELIMITER); 577 $b = false === $lastDelimIndex ? 0 : $lastDelimIndex; 578 $inputLength = \strlen($input); 579 $output = []; 580 $bytes = array_map('ord', str_split($input)); 581 582 for ($j = 0; $j < $b; ++$j) { 583 if ($bytes[$j] > 0x7F) { 584 throw new \Exception('Invalid input'); 585 } 586 587 $output[$out++] = $input[$j]; 588 } 589 590 if ($b > 0) { 591 ++$b; 592 } 593 594 for ($in = $b; $in < $inputLength; ++$out) { 595 $oldi = $i; 596 $w = 1; 597 598 for ($k = self::BASE; /* no condition */; $k += self::BASE) { 599 if ($in >= $inputLength) { 600 throw new \Exception('Invalid input'); 601 } 602 603 $digit = self::$basicToDigit[$bytes[$in++] & 0xFF]; 604 605 if ($digit < 0) { 606 throw new \Exception('Invalid input'); 607 } 608 609 if ($digit > intdiv(self::MAX_INT - $i, $w)) { 610 throw new \Exception('Integer overflow'); 611 } 612 613 $i += $digit * $w; 614 615 if ($k <= $bias) { 616 $t = self::TMIN; 617 } elseif ($k >= $bias + self::TMAX) { 618 $t = self::TMAX; 619 } else { 620 $t = $k - $bias; 621 } 622 623 if ($digit < $t) { 624 break; 625 } 626 627 $baseMinusT = self::BASE - $t; 628 629 if ($w > intdiv(self::MAX_INT, $baseMinusT)) { 630 throw new \Exception('Integer overflow'); 631 } 632 633 $w *= $baseMinusT; 634 } 635 636 $outPlusOne = $out + 1; 637 $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi); 638 639 if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) { 640 throw new \Exception('Integer overflow'); 641 } 642 643 $n += intdiv($i, $outPlusOne); 644 $i %= $outPlusOne; 645 array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]); 646 } 647 648 return implode('', $output); 649 } 650 651 /** 652 * @see https://tools.ietf.org/html/rfc3492#section-6.3 653 * 654 * @param string $input 655 * 656 * @return string 657 */ 658 private static function punycodeEncode($input) 659 { 660 $n = self::INITIAL_N; 661 $delta = 0; 662 $out = 0; 663 $bias = self::INITIAL_BIAS; 664 $inputLength = 0; 665 $output = ''; 666 $iter = self::utf8Decode($input); 667 668 foreach ($iter as $codePoint) { 669 ++$inputLength; 670 671 if ($codePoint < 0x80) { 672 $output .= \chr($codePoint); 673 ++$out; 674 } 675 } 676 677 $h = $out; 678 $b = $out; 679 680 if ($b > 0) { 681 $output .= self::DELIMITER; 682 ++$out; 683 } 684 685 while ($h < $inputLength) { 686 $m = self::MAX_INT; 687 688 foreach ($iter as $codePoint) { 689 if ($codePoint >= $n && $codePoint < $m) { 690 $m = $codePoint; 691 } 692 } 693 694 if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) { 695 throw new \Exception('Integer overflow'); 696 } 697 698 $delta += ($m - $n) * ($h + 1); 699 $n = $m; 700 701 foreach ($iter as $codePoint) { 702 if ($codePoint < $n && 0 === ++$delta) { 703 throw new \Exception('Integer overflow'); 704 } 705 706 if ($codePoint === $n) { 707 $q = $delta; 708 709 for ($k = self::BASE; /* no condition */; $k += self::BASE) { 710 if ($k <= $bias) { 711 $t = self::TMIN; 712 } elseif ($k >= $bias + self::TMAX) { 713 $t = self::TMAX; 714 } else { 715 $t = $k - $bias; 716 } 717 718 if ($q < $t) { 719 break; 720 } 721 722 $qMinusT = $q - $t; 723 $baseMinusT = self::BASE - $t; 724 $output .= self::encodeDigit($t + $qMinusT % $baseMinusT, false); 725 ++$out; 726 $q = intdiv($qMinusT, $baseMinusT); 727 } 728 729 $output .= self::encodeDigit($q, false); 730 ++$out; 731 $bias = self::adaptBias($delta, $h + 1, $h === $b); 732 $delta = 0; 733 ++$h; 734 } 735 } 736 737 ++$delta; 738 ++$n; 739 } 740 741 return $output; 742 } 743 744 /** 745 * @see https://tools.ietf.org/html/rfc3492#section-6.1 746 * 747 * @param int $delta 748 * @param int $numPoints 749 * @param bool $firstTime 750 * 751 * @return int 752 */ 753 private static function adaptBias($delta, $numPoints, $firstTime) 754 { 755 // xxx >> 1 is a faster way of doing intdiv(xxx, 2) 756 $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1; 757 $delta += intdiv($delta, $numPoints); 758 $k = 0; 759 760 while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) { 761 $delta = intdiv($delta, self::BASE - self::TMIN); 762 $k += self::BASE; 763 } 764 765 return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW); 766 } 767 768 /** 769 * @param int $d 770 * @param bool $flag 771 * 772 * @return string 773 */ 774 private static function encodeDigit($d, $flag) 775 { 776 return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5)); 777 } 778 779 /** 780 * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any 781 * invalid byte sequences will be replaced by a U+FFFD replacement code point. 782 * 783 * @see https://encoding.spec.whatwg.org/#utf-8-decoder 784 * 785 * @param string $input 786 * 787 * @return array<int, int> 788 */ 789 private static function utf8Decode($input) 790 { 791 $bytesSeen = 0; 792 $bytesNeeded = 0; 793 $lowerBoundary = 0x80; 794 $upperBoundary = 0xBF; 795 $codePoint = 0; 796 $codePoints = []; 797 $length = \strlen($input); 798 799 for ($i = 0; $i < $length; ++$i) { 800 $byte = \ord($input[$i]); 801 802 if (0 === $bytesNeeded) { 803 if ($byte >= 0x00 && $byte <= 0x7F) { 804 $codePoints[] = $byte; 805 806 continue; 807 } 808 809 if ($byte >= 0xC2 && $byte <= 0xDF) { 810 $bytesNeeded = 1; 811 $codePoint = $byte & 0x1F; 812 } elseif ($byte >= 0xE0 && $byte <= 0xEF) { 813 if (0xE0 === $byte) { 814 $lowerBoundary = 0xA0; 815 } elseif (0xED === $byte) { 816 $upperBoundary = 0x9F; 817 } 818 819 $bytesNeeded = 2; 820 $codePoint = $byte & 0xF; 821 } elseif ($byte >= 0xF0 && $byte <= 0xF4) { 822 if (0xF0 === $byte) { 823 $lowerBoundary = 0x90; 824 } elseif (0xF4 === $byte) { 825 $upperBoundary = 0x8F; 826 } 827 828 $bytesNeeded = 3; 829 $codePoint = $byte & 0x7; 830 } else { 831 $codePoints[] = 0xFFFD; 832 } 833 834 continue; 835 } 836 837 if ($byte < $lowerBoundary || $byte > $upperBoundary) { 838 $codePoint = 0; 839 $bytesNeeded = 0; 840 $bytesSeen = 0; 841 $lowerBoundary = 0x80; 842 $upperBoundary = 0xBF; 843 --$i; 844 $codePoints[] = 0xFFFD; 845 846 continue; 847 } 848 849 $lowerBoundary = 0x80; 850 $upperBoundary = 0xBF; 851 $codePoint = ($codePoint << 6) | ($byte & 0x3F); 852 853 if (++$bytesSeen !== $bytesNeeded) { 854 continue; 855 } 856 857 $codePoints[] = $codePoint; 858 $codePoint = 0; 859 $bytesNeeded = 0; 860 $bytesSeen = 0; 861 } 862 863 // String unexpectedly ended, so append a U+FFFD code point. 864 if (0 !== $bytesNeeded) { 865 $codePoints[] = 0xFFFD; 866 } 867 868 return $codePoints; 869 } 870 871 /** 872 * @param int $codePoint 873 * @param bool $useSTD3ASCIIRules 874 * 875 * @return array{status: string, mapping?: string} 876 */ 877 private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules) 878 { 879 if (!self::$mappingTableLoaded) { 880 self::$mappingTableLoaded = true; 881 self::$mapped = require __DIR__.'/Resources/unidata/mapped.php'; 882 self::$ignored = require __DIR__.'/Resources/unidata/ignored.php'; 883 self::$deviation = require __DIR__.'/Resources/unidata/deviation.php'; 884 self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php'; 885 self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php'; 886 self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php'; 887 } 888 889 if (isset(self::$mapped[$codePoint])) { 890 return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]]; 891 } 892 893 if (isset(self::$ignored[$codePoint])) { 894 return ['status' => 'ignored']; 895 } 896 897 if (isset(self::$deviation[$codePoint])) { 898 return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]]; 899 } 900 901 if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) { 902 return ['status' => 'disallowed']; 903 } 904 905 $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]); 906 907 if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) { 908 $status = 'disallowed'; 909 910 if (!$useSTD3ASCIIRules) { 911 $status = $isDisallowedMapped ? 'mapped' : 'valid'; 912 } 913 914 if ($isDisallowedMapped) { 915 return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]]; 916 } 917 918 return ['status' => $status]; 919 } 920 921 return ['status' => 'valid']; 922 } 923 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Mon Nov 25 19:05:08 2024 | Cross-referenced by PHPXref 0.7.1 |