[ Index ] |
PHP Cross Reference of phpBB-3.2.11-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /* 4 * Copyright (C) 2016 Nicolas Grekas - p@tchwork.com 5 * 6 * This library is free software; you can redistribute it and/or modify it 7 * under the terms of the (at your option): 8 * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or 9 * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt). 10 */ 11 12 namespace Patchwork\PHP\Shim; 13 14 /** 15 * iconv implementation in pure PHP, UTF-8 centric. 16 * 17 * Implemented: 18 * - iconv - Convert string to requested character encoding 19 * - iconv_mime_decode - Decodes a MIME header field 20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once 21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension 22 * - iconv_set_encoding - Set current setting for character encoding conversion 23 * - iconv_mime_encode - Composes a MIME header field 24 * - ob_iconv_handler - Convert character encoding as output buffer handler 25 * - iconv_strlen - Returns the character count of string 26 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack 27 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack 28 * - iconv_substr - Cut out part of a string 29 * 30 * Charsets available for conversion are defined by files 31 * in the charset/ directory and by Iconv::$alias below. 32 * You're welcome to send back any addition you make. 33 * 34 * @internal 35 */ 36 class Iconv 37 { 38 const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string'; 39 const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed'; 40 41 public static $inputEncoding = 'utf-8'; 42 public static $outputEncoding = 'utf-8'; 43 public static $internalEncoding = 'utf-8'; 44 45 private static $alias = array( 46 'utf8' => 'utf-8', 47 'ascii' => 'us-ascii', 48 'tis-620' => 'iso-8859-11', 49 'cp1250' => 'windows-1250', 50 'cp1251' => 'windows-1251', 51 'cp1252' => 'windows-1252', 52 'cp1253' => 'windows-1253', 53 'cp1254' => 'windows-1254', 54 'cp1255' => 'windows-1255', 55 'cp1256' => 'windows-1256', 56 'cp1257' => 'windows-1257', 57 'cp1258' => 'windows-1258', 58 'shift-jis' => 'cp932', 59 'shift_jis' => 'cp932', 60 'latin1' => 'iso-8859-1', 61 'latin2' => 'iso-8859-2', 62 'latin3' => 'iso-8859-3', 63 'latin4' => 'iso-8859-4', 64 'latin5' => 'iso-8859-9', 65 'latin6' => 'iso-8859-10', 66 'latin7' => 'iso-8859-13', 67 'latin8' => 'iso-8859-14', 68 'latin9' => 'iso-8859-15', 69 'latin10' => 'iso-8859-16', 70 'iso8859-1' => 'iso-8859-1', 71 'iso8859-2' => 'iso-8859-2', 72 'iso8859-3' => 'iso-8859-3', 73 'iso8859-4' => 'iso-8859-4', 74 'iso8859-5' => 'iso-8859-5', 75 'iso8859-6' => 'iso-8859-6', 76 'iso8859-7' => 'iso-8859-7', 77 'iso8859-8' => 'iso-8859-8', 78 'iso8859-9' => 'iso-8859-9', 79 'iso8859-10' => 'iso-8859-10', 80 'iso8859-11' => 'iso-8859-11', 81 'iso8859-12' => 'iso-8859-12', 82 'iso8859-13' => 'iso-8859-13', 83 'iso8859-14' => 'iso-8859-14', 84 'iso8859-15' => 'iso-8859-15', 85 'iso8859-16' => 'iso-8859-16', 86 'iso_8859-1' => 'iso-8859-1', 87 'iso_8859-2' => 'iso-8859-2', 88 'iso_8859-3' => 'iso-8859-3', 89 'iso_8859-4' => 'iso-8859-4', 90 'iso_8859-5' => 'iso-8859-5', 91 'iso_8859-6' => 'iso-8859-6', 92 'iso_8859-7' => 'iso-8859-7', 93 'iso_8859-8' => 'iso-8859-8', 94 'iso_8859-9' => 'iso-8859-9', 95 'iso_8859-10' => 'iso-8859-10', 96 'iso_8859-11' => 'iso-8859-11', 97 'iso_8859-12' => 'iso-8859-12', 98 'iso_8859-13' => 'iso-8859-13', 99 'iso_8859-14' => 'iso-8859-14', 100 'iso_8859-15' => 'iso-8859-15', 101 'iso_8859-16' => 'iso-8859-16', 102 'iso88591' => 'iso-8859-1', 103 'iso88592' => 'iso-8859-2', 104 'iso88593' => 'iso-8859-3', 105 'iso88594' => 'iso-8859-4', 106 'iso88595' => 'iso-8859-5', 107 'iso88596' => 'iso-8859-6', 108 'iso88597' => 'iso-8859-7', 109 'iso88598' => 'iso-8859-8', 110 'iso88599' => 'iso-8859-9', 111 'iso885910' => 'iso-8859-10', 112 'iso885911' => 'iso-8859-11', 113 'iso885912' => 'iso-8859-12', 114 'iso885913' => 'iso-8859-13', 115 'iso885914' => 'iso-8859-14', 116 'iso885915' => 'iso-8859-15', 117 'iso885916' => 'iso-8859-16', 118 ); 119 private static $translitMap = array(); 120 private static $convertMap = array(); 121 private static $errorHandler; 122 private static $lastError; 123 124 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4); 125 private static $isValidUtf8; 126 127 public static function iconv($inCharset, $outCharset, $str) 128 { 129 if ('' === $str .= '') { 130 return ''; 131 } 132 133 // Prepare for //IGNORE and //TRANSLIT 134 135 $translit = $ignore = ''; 136 137 $outCharset = strtolower($outCharset); 138 $inCharset = strtolower($inCharset); 139 140 if ('' === $outCharset) { 141 $outCharset = 'iso-8859-1'; 142 } 143 if ('' === $inCharset) { 144 $inCharset = 'iso-8859-1'; 145 } 146 147 if ('//translit' === substr($outCharset, -10)) { 148 $translit = '//TRANSLIT'; 149 $outCharset = substr($outCharset, 0, -10); 150 } 151 152 if ('//ignore' === substr($outCharset, -8)) { 153 $ignore = '//IGNORE'; 154 $outCharset = substr($outCharset, 0, -8); 155 } 156 157 if ('//translit' === substr($inCharset, -10)) { 158 $inCharset = substr($inCharset, 0, -10); 159 } 160 if ('//ignore' === substr($inCharset, -8)) { 161 $inCharset = substr($inCharset, 0, -8); 162 } 163 164 if (isset(self::$alias[ $inCharset])) { 165 $inCharset = self::$alias[ $inCharset]; 166 } 167 if (isset(self::$alias[$outCharset])) { 168 $outCharset = self::$alias[$outCharset]; 169 } 170 171 // Load charset maps 172 173 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap)) 174 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) { 175 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset)); 176 177 return false; 178 } 179 180 if ('utf-8' !== $inCharset) { 181 // Convert input to UTF-8 182 $result = ''; 183 if (self::mapToUtf8($result, $inMap, $str, $ignore)) { 184 $str = $result; 185 } else { 186 $str = false; 187 } 188 self::$isValidUtf8 = true; 189 } else { 190 self::$isValidUtf8 = preg_match('//u', $str); 191 192 if (!self::$isValidUtf8 && !$ignore) { 193 trigger_error(self::ERROR_ILLEGAL_CHARACTER); 194 195 return false; 196 } 197 198 if ('utf-8' === $outCharset) { 199 // UTF-8 validation 200 $str = self::utf8ToUtf8($str, $ignore); 201 } 202 } 203 204 if ('utf-8' !== $outCharset && false !== $str) { 205 // Convert output to UTF-8 206 $result = ''; 207 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) { 208 return $result; 209 } 210 211 return false; 212 } 213 214 return $str; 215 } 216 217 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null) 218 { 219 if (null === $charset) { 220 $charset = self::$internalEncoding; 221 } 222 223 if (false !== strpos($str, "\r")) { 224 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); 225 } 226 $str = explode("\n\n", $str, 2); 227 228 $headers = array(); 229 230 $str = preg_split('/\n(?![ \t])/', $str[0]); 231 foreach ($str as $str) { 232 $str = self::iconv_mime_decode($str, $mode, $charset); 233 if (false === $str) { 234 return false; 235 } 236 $str = explode(':', $str, 2); 237 238 if (2 === count($str)) { 239 if (isset($headers[$str[0]])) { 240 if (!is_array($headers[$str[0]])) { 241 $headers[$str[0]] = array($headers[$str[0]]); 242 } 243 $headers[$str[0]][] = ltrim($str[1]); 244 } else { 245 $headers[$str[0]] = ltrim($str[1]); 246 } 247 } 248 } 249 250 return $headers; 251 } 252 253 public static function iconv_mime_decode($str, $mode = 0, $charset = null) 254 { 255 if (null === $charset) { 256 $charset = self::$internalEncoding; 257 } 258 if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { 259 $charset .= '//IGNORE'; 260 } 261 262 if (false !== strpos($str, "\r")) { 263 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); 264 } 265 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2); 266 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0])); 267 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE); 268 269 $result = self::iconv('utf-8', $charset, $str[0]); 270 if (false === $result) { 271 return false; 272 } 273 274 $i = 1; 275 $len = count($str); 276 277 while ($i < $len) { 278 $c = strtolower($str[$i]); 279 if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) 280 && 'utf-8' !== $c 281 && !isset(self::$alias[$c]) 282 && !self::loadMap('from.', $c, $d)) { 283 $d = false; 284 } elseif ('B' === strtoupper($str[$i + 1])) { 285 $d = base64_decode($str[$i + 2]); 286 } else { 287 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% ')); 288 } 289 290 if (false !== $d) { 291 if ('' !== $d) { 292 if ('' === $d = self::iconv($c, $charset, $d)) { 293 $str[$i + 3] = substr($str[$i + 3], 1); 294 } else { 295 $result .= $d; 296 } 297 } 298 $d = self::iconv('utf-8', $charset, $str[$i + 3]); 299 if ('' !== trim($d)) { 300 $result .= $d; 301 } 302 } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { 303 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}"; 304 } else { 305 $result = false; 306 break; 307 } 308 309 $i += 4; 310 } 311 312 return $result; 313 } 314 315 public static function iconv_get_encoding($type = 'all') 316 { 317 switch ($type) { 318 case 'input_encoding': return self::$inputEncoding; 319 case 'output_encoding': return self::$outputEncoding; 320 case 'internal_encoding': return self::$internalEncoding; 321 } 322 323 return array( 324 'input_encoding' => self::$inputEncoding, 325 'output_encoding' => self::$outputEncoding, 326 'internal_encoding' => self::$internalEncoding, 327 ); 328 } 329 330 public static function iconv_set_encoding($type, $charset) 331 { 332 switch ($type) { 333 case 'input_encoding': self::$inputEncoding = $charset; break; 334 case 'output_encoding': self::$outputEncoding = $charset; break; 335 case 'internal_encoding': self::$internalEncoding = $charset; break; 336 337 default: return false; 338 } 339 340 return true; 341 } 342 343 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null) 344 { 345 if (!is_array($pref)) { 346 $pref = array(); 347 } 348 349 $pref += array( 350 'scheme' => 'B', 351 'input-charset' => self::$internalEncoding, 352 'output-charset' => self::$internalEncoding, 353 'line-length' => 76, 354 'line-break-chars' => "\r\n", 355 ); 356 357 if (preg_match('/[\x80-\xFF]/', $fieldName)) { 358 $fieldName = ''; 359 } 360 361 $scheme = strtoupper(substr($pref['scheme'], 0, 1)); 362 $in = strtolower($pref['input-charset']); 363 $out = strtolower($pref['output-charset']); 364 365 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) { 366 return false; 367 } 368 369 preg_match_all('/./us', $fieldValue, $chars); 370 371 $chars = isset($chars[0]) ? $chars[0] : array(); 372 373 $lineBreak = (int) $pref['line-length']; 374 $lineStart = "=?{$pref['output-charset']}?{$scheme}?"; 375 $lineLength = strlen($fieldName) + 2 + strlen($lineStart) + 2; 376 $lineOffset = strlen($lineStart) + 3; 377 $lineData = ''; 378 379 $fieldValue = array(); 380 381 $Q = 'Q' === $scheme; 382 383 foreach ($chars as $c) { 384 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) { 385 return false; 386 } 387 388 $o = $Q 389 ? $c = preg_replace_callback( 390 '/[=_\?\x00-\x1F\x80-\xFF]/', 391 array(__CLASS__, 'qpByteCallback'), 392 $c 393 ) 394 : base64_encode($lineData.$c); 395 396 if (isset($o[$lineBreak - $lineLength])) { 397 if (!$Q) { 398 $lineData = base64_encode($lineData); 399 } 400 $fieldValue[] = $lineStart.$lineData.'?='; 401 $lineLength = $lineOffset; 402 $lineData = ''; 403 } 404 405 $lineData .= $c; 406 $Q && $lineLength += strlen($c); 407 } 408 409 if ('' !== $lineData) { 410 if (!$Q) { 411 $lineData = base64_encode($lineData); 412 } 413 $fieldValue[] = $lineStart.$lineData.'?='; 414 } 415 416 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue); 417 } 418 419 public static function ob_iconv_handler($buffer, $mode) 420 { 421 return self::iconv(self::$internalEncoding, self::$outputEncoding, $buffer); 422 } 423 424 public static function iconv_strlen($s, $encoding = null) 425 { 426 static $hasXml = null; 427 if (null === $hasXml) { 428 $hasXml = extension_loaded('xml'); 429 } 430 431 if ($hasXml) { 432 return self::strlen1($s, $encoding); 433 } 434 435 return self::strlen2($s, $encoding); 436 } 437 438 public static function strlen1($s, $encoding = null) 439 { 440 if (null === $encoding) { 441 $encoding = self::$internalEncoding; 442 } 443 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { 444 return false; 445 } 446 447 return strlen(utf8_decode($s)); 448 } 449 450 public static function strlen2($s, $encoding = null) 451 { 452 if (null === $encoding) { 453 $encoding = self::$internalEncoding; 454 } 455 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { 456 return false; 457 } 458 459 $ulenMask = self::$ulenMask; 460 461 $i = 0; 462 $j = 0; 463 $len = strlen($s); 464 465 while ($i < $len) { 466 $u = $s[$i] & "\xF0"; 467 $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1; 468 ++$j; 469 } 470 471 return $j; 472 } 473 474 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null) 475 { 476 if (null === $encoding) { 477 $encoding = self::$internalEncoding; 478 } 479 480 if (0 !== stripos($encoding, 'utf-8')) { 481 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { 482 return false; 483 } 484 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { 485 return false; 486 } 487 } 488 489 if ($offset = (int) $offset) { 490 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8'); 491 } 492 $pos = strpos($haystack, $needle); 493 494 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0)); 495 } 496 497 public static function iconv_strrpos($haystack, $needle, $encoding = null) 498 { 499 if (null === $encoding) { 500 $encoding = self::$internalEncoding; 501 } 502 503 if (0 !== stripos($encoding, 'utf-8')) { 504 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { 505 return false; 506 } 507 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { 508 return false; 509 } 510 } 511 512 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false; 513 514 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8'); 515 } 516 517 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null) 518 { 519 if (null === $encoding) { 520 $encoding = self::$internalEncoding; 521 } 522 if (0 !== stripos($encoding, 'utf-8')) { 523 $encoding = null; 524 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) { 525 return false; 526 } 527 528 $s .= ''; 529 $slen = self::iconv_strlen($s, 'utf-8'); 530 $start = (int) $start; 531 532 if (0 > $start) { 533 $start += $slen; 534 } 535 if (0 > $start) { 536 return false; 537 } 538 if ($start >= $slen) { 539 return false; 540 } 541 542 $rx = $slen - $start; 543 544 if (0 > $length) { 545 $length += $rx; 546 } 547 if (0 === $length) { 548 return ''; 549 } 550 if (0 > $length) { 551 return false; 552 } 553 554 if ($length > $rx) { 555 $length = $rx; 556 } 557 558 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u'; 559 560 $s = preg_match($rx, $s, $s) ? $s[1] : ''; 561 562 if (null === $encoding) { 563 return $s; 564 } 565 566 return self::iconv('utf-8', $encoding, $s); 567 } 568 569 private static function loadMap($type, $charset, &$map) 570 { 571 if (!isset(self::$convertMap[$type.$charset])) { 572 if (false === $map = self::getData($type.$charset)) { 573 if ('to.' === $type && self::loadMap('from.', $charset, $map)) { 574 $map = array_flip($map); 575 } else { 576 return false; 577 } 578 } 579 580 self::$convertMap[$type.$charset] = $map; 581 } else { 582 $map = self::$convertMap[$type.$charset]; 583 } 584 585 return true; 586 } 587 588 private static function utf8ToUtf8($str, $ignore) 589 { 590 $ulenMask = self::$ulenMask; 591 $valid = self::$isValidUtf8; 592 593 $u = $str; 594 $i = $j = 0; 595 $len = strlen($str); 596 597 while ($i < $len) { 598 if ($str[$i] < "\x80") { 599 $u[$j++] = $str[$i++]; 600 } else { 601 $ulen = $str[$i] & "\xF0"; 602 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1; 603 $uchr = substr($str, $i, $ulen); 604 605 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) { 606 if ($ignore) { 607 ++$i; 608 continue; 609 } 610 611 trigger_error(self::ERROR_ILLEGAL_CHARACTER); 612 613 return false; 614 } else { 615 $i += $ulen; 616 } 617 618 $u[$j++] = $uchr[0]; 619 620 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1]) 621 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2]) 622 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]); 623 } 624 } 625 626 return substr($u, 0, $j); 627 } 628 629 private static function mapToUtf8(&$result, $map, $str, $ignore) 630 { 631 $len = strlen($str); 632 for ($i = 0; $i < $len; ++$i) { 633 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) { 634 $result .= $map[$str[$i].$str[++$i]]; 635 } elseif (isset($map[$str[$i]])) { 636 $result .= $map[$str[$i]]; 637 } elseif (!$ignore) { 638 trigger_error(self::ERROR_ILLEGAL_CHARACTER); 639 640 return false; 641 } 642 } 643 644 return true; 645 } 646 647 private static function mapFromUtf8(&$result, $map, $str, $ignore, $translit) 648 { 649 $ulenMask = self::$ulenMask; 650 $valid = self::$isValidUtf8; 651 652 if ($translit && !self::$translitMap) { 653 self::$translitMap = self::getData('translit'); 654 } 655 656 $i = 0; 657 $len = strlen($str); 658 659 while ($i < $len) { 660 if ($str[$i] < "\x80") { 661 $uchr = $str[$i++]; 662 } else { 663 $ulen = $str[$i] & "\xF0"; 664 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1; 665 $uchr = substr($str, $i, $ulen); 666 667 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) { 668 ++$i; 669 continue; 670 } else { 671 $i += $ulen; 672 } 673 } 674 675 if (isset($map[$uchr])) { 676 $result .= $map[$uchr]; 677 } elseif ($translit) { 678 if (isset(self::$translitMap[$uchr])) { 679 $uchr = self::$translitMap[$uchr]; 680 } elseif ($uchr >= "\xC3\x80") { 681 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD); 682 683 if ($uchr[0] < "\x80") { 684 $uchr = $uchr[0]; 685 } elseif ($ignore) { 686 continue; 687 } else { 688 return false; 689 } 690 } 691 692 $str = $uchr.substr($str, $i); 693 $len = strlen($str); 694 $i = 0; 695 } elseif (!$ignore) { 696 return false; 697 } 698 } 699 700 return true; 701 } 702 703 private static function qpByteCallback($m) 704 { 705 return '='.strtoupper(dechex(ord($m[0]))); 706 } 707 708 private static function pregOffset($offset) 709 { 710 $rx = array(); 711 $offset = (int) $offset; 712 713 while ($offset > 65535) { 714 $rx[] = '.{65535}'; 715 $offset -= 65535; 716 } 717 718 return implode('', $rx).'.{'.$offset.'}'; 719 } 720 721 private static function getData($file) 722 { 723 if (file_exists($file = __DIR__.'/charset/'.$file.'.ser')) { 724 return unserialize(file_get_contents($file)); 725 } 726 727 return false; 728 } 729 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Nov 11 20:33:01 2020 | Cross-referenced by PHPXref 0.7.1 |