[ Index ] |
PHP Cross Reference of phpBB-3.2.11-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /* 4 * Copyright (C) 2016 Nicolas Grekas - p@tchwork.com 5 * 6 * This library is free software; you can redistribute it and/or modify it 7 * under the terms of the (at your option): 8 * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or 9 * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt). 10 */ 11 12 namespace Patchwork\PHP\Shim; 13 14 /** 15 * Partial mbstring implementation in PHP, iconv based, UTF-8 centric. 16 * 17 * Implemented: 18 * - mb_convert_encoding - Convert character encoding 19 * - mb_convert_variables - Convert character code in variable(s) 20 * - mb_decode_mimeheader - Decode string in MIME header field 21 * - mb_encode_mimeheader - Encode string for MIME header XXX NATIVE IMPLEMENTATION IS REALLY BUGGED 22 * - mb_convert_case - Perform case folding on a string 23 * - mb_get_info - Get internal settings of mbstring 24 * - mb_http_input - Detect HTTP input character encoding 25 * - mb_http_output - Set/Get HTTP output character encoding 26 * - mb_internal_encoding - Set/Get internal character encoding 27 * - mb_list_encodings - Returns an array of all supported encodings 28 * - mb_output_handler - Callback function converts character encoding in output buffer 29 * - mb_strlen - Get string length 30 * - mb_strpos - Find position of first occurrence of string in a string 31 * - mb_strrpos - Find position of last occurrence of a string in a string 32 * - mb_strtolower - Make a string lowercase 33 * - mb_strtoupper - Make a string uppercase 34 * - mb_substitute_character - Set/Get substitution character 35 * - mb_substr - Get part of string 36 * - mb_stripos - Finds position of first occurrence of a string within another, case insensitive 37 * - mb_stristr - Finds first occurrence of a string within another, case insensitive 38 * - mb_strrchr - Finds the last occurrence of a character in a string within another 39 * - mb_strrichr - Finds the last occurrence of a character in a string within another, case insensitive 40 * - mb_strripos - Finds position of last occurrence of a string within another, case insensitive 41 * - mb_strstr - Finds first occurrence of a string within anothers 42 * - mb_strwidth - Return width of string 43 * - mb_substr_count - Count the number of substring occurrences 44 * 45 * Not implemented: 46 * - mb_convert_kana - Convert "kana" one from another ("zen-kaku", "han-kaku" and more) 47 * - mb_decode_numericentity - Decode HTML numeric string reference to character 48 * - mb_encode_numericentity - Encode character to HTML numeric string reference 49 * - mb_ereg_* - Regular expression with multibyte support 50 * - mb_parse_str - Parse GET/POST/COOKIE data and set global variable 51 * - mb_preferred_mime_name - Get MIME charset string 52 * - mb_regex_encoding - Returns current encoding for multibyte regex as string 53 * - mb_regex_set_options - Set/Get the default options for mbregex functions 54 * - mb_send_mail - Send encoded mail 55 * - mb_split - Split multibyte string using regular expression 56 * - mb_strcut - Get part of string 57 * - mb_strimwidth - Get truncated string with specified width 58 * 59 * @internal 60 */ 61 class Mbstring 62 { 63 const MB_CASE_FOLD = PHP_INT_MAX; 64 65 private static $encodingList = array('ASCII', 'UTF-8'); 66 private static $language = 'neutral'; 67 private static $internalEncoding = 'UTF-8'; 68 private static $caseFold = array( 69 array('µ','ſ',"\xCD\x85",'ς',"\xCF\x90","\xCF\x91","\xCF\x95","\xCF\x96","\xCF\xB0","\xCF\xB1","\xCF\xB5","\xE1\xBA\x9B","\xE1\xBE\xBE"), 70 array('μ','s','ι', 'σ','β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1",'ι'), 71 ); 72 73 public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null) 74 { 75 if (is_array($fromEncoding) || false !== strpos($fromEncoding, ',')) { 76 $fromEncoding = self::mb_detect_encoding($s, $fromEncoding); 77 } else { 78 $fromEncoding = self::getEncoding($fromEncoding); 79 } 80 81 $toEncoding = self::getEncoding($toEncoding); 82 83 if ('BASE64' === $fromEncoding) { 84 $s = base64_decode($s); 85 $fromEncoding = $toEncoding; 86 } 87 88 if ('BASE64' === $toEncoding) { 89 return base64_encode($s); 90 } 91 92 if ('HTML-ENTITIES' === $toEncoding || 'HTML' === $toEncoding) { 93 if ('HTML-ENTITIES' === $fromEncoding || 'HTML' === $fromEncoding) { 94 $fromEncoding = 'Windows-1252'; 95 } 96 if ('UTF-8' !== $fromEncoding) { 97 $s = iconv($fromEncoding, 'UTF-8', $s); 98 } 99 100 return preg_replace_callback('/[\x80-\xFF]+/', array(__CLASS__, 'html_encoding_callback'), $s); 101 } 102 103 if ('HTML-ENTITIES' === $fromEncoding) { 104 $s = html_entity_decode($s, ENT_COMPAT, 'UTF-8'); 105 $fromEncoding = 'UTF-8'; 106 } 107 108 return iconv($fromEncoding, $toEncoding, $s); 109 } 110 111 public static function mb_convert_variables($toEncoding, $fromEncoding, &$a = null, &$b = null, &$c = null, &$d = null, &$e = null, &$f = null) 112 { 113 $vars = array(&$a, &$b, &$c, &$d, &$e, &$f); 114 115 $ok = true; 116 array_walk_recursive($vars, function (&$v) use (&$ok, $toEncoding, $fromEncoding) { 117 if (false === $v = Mbstring::mb_convert_encoding($v, $toEncoding, $fromEncoding)) { 118 $ok = false; 119 } 120 }); 121 122 return $ok ? $fromEncoding : false; 123 } 124 125 public static function mb_decode_mimeheader($s) 126 { 127 return iconv_mime_decode($s, 2, self::$internalEncoding); 128 } 129 130 public static function mb_encode_mimeheader($s, $charset = null, $transferEncoding = null, $linefeed = null, $indent = null) 131 { 132 trigger_error('mb_encode_mimeheader() is bugged. Please use iconv_mime_encode() instead', E_USER_WARNING); 133 } 134 135 public static function mb_convert_case($s, $mode, $encoding = null) 136 { 137 if ('' === $s .= '') { 138 return ''; 139 } 140 141 $encoding = self::getEncoding($encoding); 142 143 if ('UTF-8' === $encoding) { 144 $encoding = null; 145 } else { 146 $s = iconv($encoding, 'UTF-8', $s); 147 } 148 149 if (MB_CASE_TITLE == $mode) { 150 $s = preg_replace_callback('/\b\p{Ll}/u', array(__CLASS__, 'title_case_upper'), $s); 151 $s = preg_replace_callback('/\B[\p{Lu}\p{Lt}]+/u', array(__CLASS__, 'title_case_lower'), $s); 152 } else { 153 if (MB_CASE_UPPER == $mode) { 154 static $upper = null; 155 if (null === $upper) { 156 $upper = self::getData('upperCase'); 157 } 158 $map = $upper; 159 } else { 160 if (self::MB_CASE_FOLD === $mode) { 161 $s = str_replace(self::$caseFold[0], self::$caseFold[1], $s); 162 } 163 164 static $lower = null; 165 if (null === $lower) { 166 $lower = self::getData('lowerCase'); 167 } 168 $map = $lower; 169 } 170 171 static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4); 172 173 $i = 0; 174 $len = strlen($s); 175 176 while ($i < $len) { 177 $ulen = $s[$i] < "\x80" ? 1 : $ulenMask[$s[$i] & "\xF0"]; 178 $uchr = substr($s, $i, $ulen); 179 $i += $ulen; 180 181 if (isset($map[$uchr])) { 182 $uchr = $map[$uchr]; 183 $nlen = strlen($uchr); 184 185 if ($nlen == $ulen) { 186 $nlen = $i; 187 do { 188 $s[--$nlen] = $uchr[--$ulen]; 189 } while ($ulen); 190 } else { 191 $s = substr_replace($s, $uchr, $i - $ulen, $ulen); 192 $len += $nlen - $ulen; 193 $i += $nlen - $ulen; 194 } 195 } 196 } 197 } 198 199 if (null === $encoding) { 200 return $s; 201 } 202 203 return iconv('UTF-8', $encoding, $s); 204 } 205 206 public static function mb_internal_encoding($encoding = null) 207 { 208 if (null === $encoding) { 209 return self::$internalEncoding; 210 } 211 212 $encoding = self::getEncoding($encoding); 213 214 if ('UTF-8' === $encoding || false !== @iconv($encoding, $encoding, ' ')) { 215 self::$internalEncoding = $encoding; 216 217 return true; 218 } 219 220 return false; 221 } 222 223 public static function mb_language($lang = null) 224 { 225 if (null === $lang) { 226 return self::$language; 227 } 228 229 switch ($lang = strtolower($lang)) { 230 case 'uni': 231 case 'neutral': 232 self::$language = $lang; 233 234 return true; 235 } 236 237 return false; 238 } 239 240 public static function mb_list_encodings() 241 { 242 return array('UTF-8'); 243 } 244 245 public static function mb_encoding_aliases($encoding) 246 { 247 switch (strtoupper($encoding)) { 248 case 'UTF8': 249 case 'UTF-8': 250 return array('utf8'); 251 } 252 253 return false; 254 } 255 256 public static function mb_check_encoding($var = null, $encoding = null) 257 { 258 if (null === $encoding) { 259 if (null === $var) { 260 return false; 261 } 262 $encoding = self::$internalEncoding; 263 } 264 265 return self::mb_detect_encoding($var, array($encoding)) || false !== @iconv($encoding, $encoding, $var); 266 } 267 268 public static function mb_detect_encoding($str, $encodingList = null, $strict = false) 269 { 270 if (null === $encodingList) { 271 $encodingList = self::$encodingList; 272 } else { 273 if (!is_array($encodingList)) { 274 $encodingList = array_map('trim', explode(',', $encodingList)); 275 } 276 $encodingList = array_map('strtoupper', $encodingList); 277 } 278 279 foreach ($encodingList as $enc) { 280 switch ($enc) { 281 case 'ASCII': 282 if (!preg_match('/[\x80-\xFF]/', $str)) { 283 return $enc; 284 } 285 break; 286 287 case 'UTF8': 288 case 'UTF-8': 289 if (preg_match('//u', $str)) { 290 return 'UTF-8'; 291 } 292 break; 293 294 default: 295 if (0 === strncmp($enc, 'ISO-8859-', 9)) { 296 return $enc; 297 } 298 } 299 } 300 301 return false; 302 } 303 304 public static function mb_detect_order($encodingList = null) 305 { 306 if (null === $encodingList) { 307 return self::$encodingList; 308 } 309 310 if (!is_array($encodingList)) { 311 $encodingList = array_map('trim', explode(',', $encodingList)); 312 } 313 $encodingList = array_map('strtoupper', $encodingList); 314 315 foreach ($encodingList as $enc) { 316 switch ($enc) { 317 default: 318 if (strncmp($enc, 'ISO-8859-', 9)) { 319 return false; 320 } 321 case 'ASCII': 322 case 'UTF8': 323 case 'UTF-8': 324 } 325 } 326 327 self::$encodingList = $encodingList; 328 329 return true; 330 } 331 332 public static function mb_strlen($s, $encoding = null) 333 { 334 $encoding = self::getEncoding($encoding); 335 336 return iconv_strlen($s, $encoding); 337 } 338 339 public static function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) 340 { 341 $encoding = self::getEncoding($encoding); 342 343 if ('' === $needle .= '') { 344 trigger_error(__METHOD__.': Empty delimiter', E_USER_WARNING); 345 346 return false; 347 } 348 349 return iconv_strpos($haystack, $needle, $offset, $encoding); 350 } 351 352 public static function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) 353 { 354 $encoding = self::getEncoding($encoding); 355 356 if ($offset != (int) $offset) { 357 $offset = 0; 358 } elseif ($offset = (int) $offset) { 359 if ($offset < 0) { 360 $haystack = self::mb_substr($haystack, 0, $offset, $encoding); 361 $offset = 0; 362 } else { 363 $haystack = self::mb_substr($haystack, $offset, 2147483647, $encoding); 364 } 365 } 366 367 $pos = iconv_strrpos($haystack, $needle, $encoding); 368 369 return false !== $pos ? $offset + $pos : false; 370 } 371 372 public static function mb_strtolower($s, $encoding = null) 373 { 374 return self::mb_convert_case($s, MB_CASE_LOWER, $encoding); 375 } 376 377 public static function mb_strtoupper($s, $encoding = null) 378 { 379 return self::mb_convert_case($s, MB_CASE_UPPER, $encoding); 380 } 381 382 public static function mb_substitute_character($c = null) 383 { 384 if (0 === strcasecmp($c, 'none')) { 385 return true; 386 } 387 388 return null !== $c ? false : 'none'; 389 } 390 391 public static function mb_substr($s, $start, $length = null, $encoding = null) 392 { 393 $encoding = self::getEncoding($encoding); 394 395 if ($start < 0) { 396 $start = iconv_strlen($s, $encoding) + $start; 397 if ($start < 0) { 398 $start = 0; 399 } 400 } 401 402 if (null === $length) { 403 $length = 2147483647; 404 } elseif ($length < 0) { 405 $length = iconv_strlen($s, $encoding) + $length - $start; 406 if ($length < 0) { 407 return ''; 408 } 409 } 410 411 return iconv_substr($s, $start, $length, $encoding).''; 412 } 413 414 public static function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) 415 { 416 $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding); 417 $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding); 418 419 return self::mb_strpos($haystack, $needle, $offset, $encoding); 420 } 421 422 public static function mb_stristr($haystack, $needle, $part = false, $encoding = null) 423 { 424 $pos = self::mb_stripos($haystack, $needle, 0, $encoding); 425 426 return self::getSubpart($pos, $part, $haystack, $encoding); 427 } 428 429 public static function mb_strrchr($haystack, $needle, $part = false, $encoding = null) 430 { 431 $encoding = self::getEncoding($encoding); 432 $needle = self::mb_substr($needle, 0, 1, $encoding); 433 $pos = iconv_strrpos($haystack, $needle, $encoding); 434 435 return self::getSubpart($pos, $part, $haystack, $encoding); 436 } 437 438 public static function mb_strrichr($haystack, $needle, $part = false, $encoding = null) 439 { 440 $needle = self::mb_substr($needle, 0, 1, $encoding); 441 $pos = self::mb_strripos($haystack, $needle, $encoding); 442 443 return self::getSubpart($pos, $part, $haystack, $encoding); 444 } 445 446 public static function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) 447 { 448 $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding); 449 $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding); 450 451 return self::mb_strrpos($haystack, $needle, $offset, $encoding); 452 } 453 454 public static function mb_strstr($haystack, $needle, $part = false, $encoding = null) 455 { 456 $pos = strpos($haystack, $needle); 457 if (false === $pos) { 458 return false; 459 } 460 if ($part) { 461 return substr($haystack, 0, $pos); 462 } 463 464 return substr($haystack, $pos); 465 } 466 467 public static function mb_get_info($type = 'all') 468 { 469 $info = array( 470 'internal_encoding' => self::$internalEncoding, 471 'http_output' => 'pass', 472 'http_output_conv_mimetypes' => '^(text/|application/xhtml\+xml)', 473 'func_overload' => 0, 474 'func_overload_list' => 'no overload', 475 'mail_charset' => 'UTF-8', 476 'mail_header_encoding' => 'BASE64', 477 'mail_body_encoding' => 'BASE64', 478 'illegal_chars' => 0, 479 'encoding_translation' => 'Off', 480 'language' => self::$language, 481 'detect_order' => self::$encodingList, 482 'substitute_character' => 'none', 483 'strict_detection' => 'Off', 484 ); 485 486 if ('all' === $type) { 487 return $info; 488 } 489 if (isset($info[$type])) { 490 return $info[$type]; 491 } 492 493 return false; 494 } 495 496 public static function mb_http_input($type = '') 497 { 498 return false; 499 } 500 501 public static function mb_http_output($encoding = null) 502 { 503 return null !== $encoding ? 'pass' === $encoding : 'pass'; 504 } 505 506 public static function mb_strwidth($s, $encoding = null) 507 { 508 $encoding = self::getEncoding($encoding); 509 510 if ('UTF-8' !== $encoding) { 511 $s = iconv($encoding, 'UTF-8', $s); 512 } 513 514 $s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide); 515 516 return ($wide << 1) + iconv_strlen($s, 'UTF-8'); 517 } 518 519 public static function mb_substr_count($haystack, $needle, $encoding = null) 520 { 521 return substr_count($haystack, $needle); 522 } 523 524 public static function mb_output_handler($contents, $status) 525 { 526 return $contents; 527 } 528 529 private static function getSubpart($pos, $part, $haystack, $encoding) 530 { 531 if (false === $pos) { 532 return false; 533 } 534 if ($part) { 535 return self::mb_substr($haystack, 0, $pos, $encoding); 536 } 537 538 return self::mb_substr($haystack, $pos, null, $encoding); 539 } 540 541 private static function html_encoding_callback($m) 542 { 543 $i = 1; 544 $entities = ''; 545 $m = unpack('C*', htmlentities($m[0], ENT_COMPAT, 'UTF-8')); 546 547 while (isset($m[$i])) { 548 if (0x80 > $m[$i]) { 549 $entities .= chr($m[$i++]); 550 continue; 551 } 552 if (0xF0 <= $m[$i]) { 553 $c = (($m[$i++] - 0xF0) << 18) + (($m[$i++] - 0x80) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80; 554 } elseif (0xE0 <= $m[$i]) { 555 $c = (($m[$i++] - 0xE0) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80; 556 } else { 557 $c = (($m[$i++] - 0xC0) << 6) + $m[$i++] - 0x80; 558 } 559 560 $entities .= '&#'.$c.';'; 561 } 562 563 return $entities; 564 } 565 566 private static function title_case_lower($s) 567 { 568 return self::mb_convert_case($s[0], MB_CASE_LOWER, 'UTF-8'); 569 } 570 571 private static function title_case_upper($s) 572 { 573 return self::mb_convert_case($s[0], MB_CASE_UPPER, 'UTF-8'); 574 } 575 576 private static function getData($file) 577 { 578 if (file_exists($file = __DIR__.'/unidata/'.$file.'.ser')) { 579 return unserialize(file_get_contents($file)); 580 } 581 582 return false; 583 } 584 585 private static function getEncoding($encoding) 586 { 587 if (null === $encoding) { 588 return self::$internalEncoding; 589 } 590 591 $encoding = strtoupper($encoding); 592 593 if ('8BIT' === $encoding || 'BINARY' === $encoding) { 594 return 'CP850'; 595 } 596 if ('UTF8' === $encoding) { 597 return 'UTF-8'; 598 } 599 600 return $encoding; 601 } 602 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Nov 11 20:33:01 2020 | Cross-referenced by PHPXref 0.7.1 |