[ Index ]

PHP Cross Reference of phpBB-3.2.11-deutsch

title

Body

[close]

/vendor/patchwork/utf8/src/Patchwork/PHP/Shim/ -> Mbstring.php (source)

   1  <?php
   2  
   3  /*
   4   * Copyright (C) 2016 Nicolas Grekas - p@tchwork.com
   5   *
   6   * This library is free software; you can redistribute it and/or modify it
   7   * under the terms of the (at your option):
   8   * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or
   9   * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt).
  10   */
  11  
  12  namespace Patchwork\PHP\Shim;
  13  
  14  /**
  15   * Partial mbstring implementation in PHP, iconv based, UTF-8 centric.
  16   *
  17   * Implemented:
  18   * - mb_convert_encoding     - Convert character encoding
  19   * - mb_convert_variables    - Convert character code in variable(s)
  20   * - mb_decode_mimeheader    - Decode string in MIME header field
  21   * - mb_encode_mimeheader    - Encode string for MIME header XXX NATIVE IMPLEMENTATION IS REALLY BUGGED
  22   * - mb_convert_case         - Perform case folding on a string
  23   * - mb_get_info             - Get internal settings of mbstring
  24   * - mb_http_input           - Detect HTTP input character encoding
  25   * - mb_http_output          - Set/Get HTTP output character encoding
  26   * - mb_internal_encoding    - Set/Get internal character encoding
  27   * - mb_list_encodings       - Returns an array of all supported encodings
  28   * - mb_output_handler       - Callback function converts character encoding in output buffer
  29   * - mb_strlen               - Get string length
  30   * - mb_strpos               - Find position of first occurrence of string in a string
  31   * - mb_strrpos              - Find position of last occurrence of a string in a string
  32   * - mb_strtolower           - Make a string lowercase
  33   * - mb_strtoupper           - Make a string uppercase
  34   * - mb_substitute_character - Set/Get substitution character
  35   * - mb_substr               - Get part of string
  36   * - mb_stripos              - Finds position of first occurrence of a string within another, case insensitive
  37   * - mb_stristr              - Finds first occurrence of a string within another, case insensitive
  38   * - mb_strrchr              - Finds the last occurrence of a character in a string within another
  39   * - mb_strrichr             - Finds the last occurrence of a character in a string within another, case insensitive
  40   * - mb_strripos             - Finds position of last occurrence of a string within another, case insensitive
  41   * - mb_strstr               - Finds first occurrence of a string within anothers
  42   * - mb_strwidth             - Return width of string
  43   * - mb_substr_count         - Count the number of substring occurrences
  44   *
  45   * Not implemented:
  46   * - mb_convert_kana         - Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
  47   * - mb_decode_numericentity - Decode HTML numeric string reference to character
  48   * - mb_encode_numericentity - Encode character to HTML numeric string reference
  49   * - mb_ereg_*               - Regular expression with multibyte support
  50   * - mb_parse_str            - Parse GET/POST/COOKIE data and set global variable
  51   * - mb_preferred_mime_name  - Get MIME charset string
  52   * - mb_regex_encoding       - Returns current encoding for multibyte regex as string
  53   * - mb_regex_set_options    - Set/Get the default options for mbregex functions
  54   * - mb_send_mail            - Send encoded mail
  55   * - mb_split                - Split multibyte string using regular expression
  56   * - mb_strcut               - Get part of string
  57   * - mb_strimwidth           - Get truncated string with specified width
  58   *
  59   * @internal
  60   */
  61  class Mbstring
  62  {
  63      const MB_CASE_FOLD = PHP_INT_MAX;
  64  
  65      private static $encodingList = array('ASCII', 'UTF-8');
  66      private static $language = 'neutral';
  67      private static $internalEncoding = 'UTF-8';
  68      private static $caseFold = array(
  69          array('µ','ſ',"\xCD\x85",'ς',"\xCF\x90","\xCF\x91","\xCF\x95","\xCF\x96","\xCF\xB0","\xCF\xB1","\xCF\xB5","\xE1\xBA\x9B","\xE1\xBE\xBE"),
  70          array('μ','s','ι',       'σ','β',       'θ',       'φ',       'π',       'κ',       'ρ',       'ε',       "\xE1\xB9\xA1",'ι'),
  71      );
  72  
  73      public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null)
  74      {
  75          if (is_array($fromEncoding) || false !== strpos($fromEncoding, ',')) {
  76              $fromEncoding = self::mb_detect_encoding($s, $fromEncoding);
  77          } else {
  78              $fromEncoding = self::getEncoding($fromEncoding);
  79          }
  80  
  81          $toEncoding = self::getEncoding($toEncoding);
  82  
  83          if ('BASE64' === $fromEncoding) {
  84              $s = base64_decode($s);
  85              $fromEncoding = $toEncoding;
  86          }
  87  
  88          if ('BASE64' === $toEncoding) {
  89              return base64_encode($s);
  90          }
  91  
  92          if ('HTML-ENTITIES' === $toEncoding || 'HTML' === $toEncoding) {
  93              if ('HTML-ENTITIES' === $fromEncoding || 'HTML' === $fromEncoding) {
  94                  $fromEncoding = 'Windows-1252';
  95              }
  96              if ('UTF-8' !== $fromEncoding) {
  97                  $s = iconv($fromEncoding, 'UTF-8', $s);
  98              }
  99  
 100              return preg_replace_callback('/[\x80-\xFF]+/', array(__CLASS__, 'html_encoding_callback'), $s);
 101          }
 102  
 103          if ('HTML-ENTITIES' === $fromEncoding) {
 104              $s = html_entity_decode($s, ENT_COMPAT, 'UTF-8');
 105              $fromEncoding = 'UTF-8';
 106          }
 107  
 108          return iconv($fromEncoding, $toEncoding, $s);
 109      }
 110  
 111      public static function mb_convert_variables($toEncoding, $fromEncoding, &$a = null, &$b = null, &$c = null, &$d = null, &$e = null, &$f = null)
 112      {
 113          $vars = array(&$a, &$b, &$c, &$d, &$e, &$f);
 114  
 115          $ok = true;
 116          array_walk_recursive($vars, function (&$v) use (&$ok, $toEncoding, $fromEncoding) {
 117              if (false === $v = Mbstring::mb_convert_encoding($v, $toEncoding, $fromEncoding)) {
 118                  $ok = false;
 119              }
 120          });
 121  
 122          return $ok ? $fromEncoding : false;
 123      }
 124  
 125      public static function mb_decode_mimeheader($s)
 126      {
 127          return iconv_mime_decode($s, 2, self::$internalEncoding);
 128      }
 129  
 130      public static function mb_encode_mimeheader($s, $charset = null, $transferEncoding = null, $linefeed = null, $indent = null)
 131      {
 132          trigger_error('mb_encode_mimeheader() is bugged. Please use iconv_mime_encode() instead', E_USER_WARNING);
 133      }
 134  
 135      public static function mb_convert_case($s, $mode, $encoding = null)
 136      {
 137          if ('' === $s .= '') {
 138              return '';
 139          }
 140  
 141          $encoding = self::getEncoding($encoding);
 142  
 143          if ('UTF-8' === $encoding) {
 144              $encoding = null;
 145          } else {
 146              $s = iconv($encoding, 'UTF-8', $s);
 147          }
 148  
 149          if (MB_CASE_TITLE == $mode) {
 150              $s = preg_replace_callback('/\b\p{Ll}/u', array(__CLASS__, 'title_case_upper'), $s);
 151              $s = preg_replace_callback('/\B[\p{Lu}\p{Lt}]+/u', array(__CLASS__, 'title_case_lower'), $s);
 152          } else {
 153              if (MB_CASE_UPPER == $mode) {
 154                  static $upper = null;
 155                  if (null === $upper) {
 156                      $upper = self::getData('upperCase');
 157                  }
 158                  $map = $upper;
 159              } else {
 160                  if (self::MB_CASE_FOLD === $mode) {
 161                      $s = str_replace(self::$caseFold[0], self::$caseFold[1], $s);
 162                  }
 163  
 164                  static $lower = null;
 165                  if (null === $lower) {
 166                      $lower = self::getData('lowerCase');
 167                  }
 168                  $map = $lower;
 169              }
 170  
 171              static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
 172  
 173              $i = 0;
 174              $len = strlen($s);
 175  
 176              while ($i < $len) {
 177                  $ulen = $s[$i] < "\x80" ? 1 : $ulenMask[$s[$i] & "\xF0"];
 178                  $uchr = substr($s, $i, $ulen);
 179                  $i += $ulen;
 180  
 181                  if (isset($map[$uchr])) {
 182                      $uchr = $map[$uchr];
 183                      $nlen = strlen($uchr);
 184  
 185                      if ($nlen == $ulen) {
 186                          $nlen = $i;
 187                          do {
 188                              $s[--$nlen] = $uchr[--$ulen];
 189                          } while ($ulen);
 190                      } else {
 191                          $s = substr_replace($s, $uchr, $i - $ulen, $ulen);
 192                          $len += $nlen - $ulen;
 193                          $i   += $nlen - $ulen;
 194                      }
 195                  }
 196              }
 197          }
 198  
 199          if (null === $encoding) {
 200              return $s;
 201          }
 202  
 203          return iconv('UTF-8', $encoding, $s);
 204      }
 205  
 206      public static function mb_internal_encoding($encoding = null)
 207      {
 208          if (null === $encoding) {
 209              return self::$internalEncoding;
 210          }
 211  
 212          $encoding = self::getEncoding($encoding);
 213  
 214          if ('UTF-8' === $encoding || false !== @iconv($encoding, $encoding, ' ')) {
 215              self::$internalEncoding = $encoding;
 216  
 217              return true;
 218          }
 219  
 220          return false;
 221      }
 222  
 223      public static function mb_language($lang = null)
 224      {
 225          if (null === $lang) {
 226              return self::$language;
 227          }
 228  
 229          switch ($lang = strtolower($lang)) {
 230              case 'uni':
 231              case 'neutral':
 232                  self::$language = $lang;
 233  
 234                  return true;
 235          }
 236  
 237          return false;
 238      }
 239  
 240      public static function mb_list_encodings()
 241      {
 242          return array('UTF-8');
 243      }
 244  
 245      public static function mb_encoding_aliases($encoding)
 246      {
 247          switch (strtoupper($encoding)) {
 248              case 'UTF8':
 249              case 'UTF-8':
 250                  return array('utf8');
 251          }
 252  
 253          return false;
 254      }
 255  
 256      public static function mb_check_encoding($var = null, $encoding = null)
 257      {
 258          if (null === $encoding) {
 259              if (null === $var) {
 260                  return false;
 261              }
 262              $encoding = self::$internalEncoding;
 263          }
 264  
 265          return self::mb_detect_encoding($var, array($encoding)) || false !== @iconv($encoding, $encoding, $var);
 266      }
 267  
 268      public static function mb_detect_encoding($str, $encodingList = null, $strict = false)
 269      {
 270          if (null === $encodingList) {
 271              $encodingList = self::$encodingList;
 272          } else {
 273              if (!is_array($encodingList)) {
 274                  $encodingList = array_map('trim', explode(',', $encodingList));
 275              }
 276              $encodingList = array_map('strtoupper', $encodingList);
 277          }
 278  
 279          foreach ($encodingList as $enc) {
 280              switch ($enc) {
 281                  case 'ASCII':
 282                      if (!preg_match('/[\x80-\xFF]/', $str)) {
 283                          return $enc;
 284                      }
 285                      break;
 286  
 287                  case 'UTF8':
 288                  case 'UTF-8':
 289                      if (preg_match('//u', $str)) {
 290                          return 'UTF-8';
 291                      }
 292                      break;
 293  
 294                  default:
 295                      if (0 === strncmp($enc, 'ISO-8859-', 9)) {
 296                          return $enc;
 297                      }
 298              }
 299          }
 300  
 301          return false;
 302      }
 303  
 304      public static function mb_detect_order($encodingList = null)
 305      {
 306          if (null === $encodingList) {
 307              return self::$encodingList;
 308          }
 309  
 310          if (!is_array($encodingList)) {
 311              $encodingList = array_map('trim', explode(',', $encodingList));
 312          }
 313          $encodingList = array_map('strtoupper', $encodingList);
 314  
 315          foreach ($encodingList as $enc) {
 316              switch ($enc) {
 317                  default:
 318                      if (strncmp($enc, 'ISO-8859-', 9)) {
 319                          return false;
 320                      }
 321                  case 'ASCII':
 322                  case 'UTF8':
 323                  case 'UTF-8':
 324              }
 325          }
 326  
 327          self::$encodingList = $encodingList;
 328  
 329          return true;
 330      }
 331  
 332      public static function mb_strlen($s, $encoding = null)
 333      {
 334          $encoding = self::getEncoding($encoding);
 335  
 336          return iconv_strlen($s, $encoding);
 337      }
 338  
 339      public static function mb_strpos($haystack, $needle, $offset = 0, $encoding = null)
 340      {
 341          $encoding = self::getEncoding($encoding);
 342  
 343          if ('' === $needle .= '') {
 344              trigger_error(__METHOD__.': Empty delimiter', E_USER_WARNING);
 345  
 346              return false;
 347          }
 348  
 349          return iconv_strpos($haystack, $needle, $offset, $encoding);
 350      }
 351  
 352      public static function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null)
 353      {
 354          $encoding = self::getEncoding($encoding);
 355  
 356          if ($offset != (int) $offset) {
 357              $offset = 0;
 358          } elseif ($offset = (int) $offset) {
 359              if ($offset < 0) {
 360                  $haystack = self::mb_substr($haystack, 0, $offset, $encoding);
 361                  $offset = 0;
 362              } else {
 363                  $haystack = self::mb_substr($haystack, $offset, 2147483647, $encoding);
 364              }
 365          }
 366  
 367          $pos = iconv_strrpos($haystack, $needle, $encoding);
 368  
 369          return false !== $pos ? $offset + $pos : false;
 370      }
 371  
 372      public static function mb_strtolower($s, $encoding = null)
 373      {
 374          return self::mb_convert_case($s, MB_CASE_LOWER, $encoding);
 375      }
 376  
 377      public static function mb_strtoupper($s, $encoding = null)
 378      {
 379          return self::mb_convert_case($s, MB_CASE_UPPER, $encoding);
 380      }
 381  
 382      public static function mb_substitute_character($c = null)
 383      {
 384          if (0 === strcasecmp($c, 'none')) {
 385              return true;
 386          }
 387  
 388          return null !== $c ? false : 'none';
 389      }
 390  
 391      public static function mb_substr($s, $start, $length = null, $encoding = null)
 392      {
 393          $encoding = self::getEncoding($encoding);
 394  
 395          if ($start < 0) {
 396              $start = iconv_strlen($s, $encoding) + $start;
 397              if ($start < 0) {
 398                  $start = 0;
 399              }
 400          }
 401  
 402          if (null === $length) {
 403              $length = 2147483647;
 404          } elseif ($length < 0) {
 405              $length = iconv_strlen($s, $encoding) + $length - $start;
 406              if ($length < 0) {
 407                  return '';
 408              }
 409          }
 410  
 411          return iconv_substr($s, $start, $length, $encoding).'';
 412      }
 413  
 414      public static function mb_stripos($haystack, $needle, $offset = 0, $encoding = null)
 415      {
 416          $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding);
 417          $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding);
 418  
 419          return self::mb_strpos($haystack, $needle, $offset, $encoding);
 420      }
 421  
 422      public static function mb_stristr($haystack, $needle, $part = false, $encoding = null)
 423      {
 424          $pos = self::mb_stripos($haystack, $needle, 0, $encoding);
 425  
 426          return self::getSubpart($pos, $part, $haystack, $encoding);
 427      }
 428  
 429      public static function mb_strrchr($haystack, $needle, $part = false, $encoding = null)
 430      {
 431          $encoding = self::getEncoding($encoding);
 432          $needle = self::mb_substr($needle, 0, 1, $encoding);
 433          $pos = iconv_strrpos($haystack, $needle, $encoding);
 434  
 435          return self::getSubpart($pos, $part, $haystack, $encoding);
 436      }
 437  
 438      public static function mb_strrichr($haystack, $needle, $part = false, $encoding = null)
 439      {
 440          $needle = self::mb_substr($needle, 0, 1, $encoding);
 441          $pos = self::mb_strripos($haystack, $needle, $encoding);
 442  
 443          return self::getSubpart($pos, $part, $haystack, $encoding);
 444      }
 445  
 446      public static function mb_strripos($haystack, $needle, $offset = 0, $encoding = null)
 447      {
 448          $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding);
 449          $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding);
 450  
 451          return self::mb_strrpos($haystack, $needle, $offset, $encoding);
 452      }
 453  
 454      public static function mb_strstr($haystack, $needle, $part = false, $encoding = null)
 455      {
 456          $pos = strpos($haystack, $needle);
 457          if (false === $pos) {
 458              return false;
 459          }
 460          if ($part) {
 461              return substr($haystack, 0, $pos);
 462          }
 463  
 464          return substr($haystack, $pos);
 465      }
 466  
 467      public static function mb_get_info($type = 'all')
 468      {
 469          $info = array(
 470              'internal_encoding' => self::$internalEncoding,
 471              'http_output' => 'pass',
 472              'http_output_conv_mimetypes' => '^(text/|application/xhtml\+xml)',
 473              'func_overload' => 0,
 474              'func_overload_list' => 'no overload',
 475              'mail_charset' => 'UTF-8',
 476              'mail_header_encoding' => 'BASE64',
 477              'mail_body_encoding' => 'BASE64',
 478              'illegal_chars' => 0,
 479              'encoding_translation' => 'Off',
 480              'language' => self::$language,
 481              'detect_order' => self::$encodingList,
 482              'substitute_character' => 'none',
 483              'strict_detection' => 'Off',
 484          );
 485  
 486          if ('all' === $type) {
 487              return $info;
 488          }
 489          if (isset($info[$type])) {
 490              return $info[$type];
 491          }
 492  
 493          return false;
 494      }
 495  
 496      public static function mb_http_input($type = '')
 497      {
 498          return false;
 499      }
 500  
 501      public static function mb_http_output($encoding = null)
 502      {
 503          return null !== $encoding ? 'pass' === $encoding : 'pass';
 504      }
 505  
 506      public static function mb_strwidth($s, $encoding = null)
 507      {
 508          $encoding = self::getEncoding($encoding);
 509  
 510          if ('UTF-8' !== $encoding) {
 511              $s = iconv($encoding, 'UTF-8', $s);
 512          }
 513  
 514          $s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
 515  
 516          return ($wide << 1) + iconv_strlen($s, 'UTF-8');
 517      }
 518  
 519      public static function mb_substr_count($haystack, $needle, $encoding = null)
 520      {
 521          return substr_count($haystack, $needle);
 522      }
 523  
 524      public static function mb_output_handler($contents, $status)
 525      {
 526          return $contents;
 527      }
 528  
 529      private static function getSubpart($pos, $part, $haystack, $encoding)
 530      {
 531          if (false === $pos) {
 532              return false;
 533          }
 534          if ($part) {
 535              return self::mb_substr($haystack, 0, $pos, $encoding);
 536          }
 537  
 538          return self::mb_substr($haystack, $pos, null, $encoding);
 539      }
 540  
 541      private static function html_encoding_callback($m)
 542      {
 543          $i = 1;
 544          $entities = '';
 545          $m = unpack('C*', htmlentities($m[0], ENT_COMPAT, 'UTF-8'));
 546  
 547          while (isset($m[$i])) {
 548              if (0x80 > $m[$i]) {
 549                  $entities .= chr($m[$i++]);
 550                  continue;
 551              }
 552              if (0xF0 <= $m[$i]) {
 553                  $c = (($m[$i++] - 0xF0) << 18) + (($m[$i++] - 0x80) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80;
 554              } elseif (0xE0 <= $m[$i]) {
 555                  $c = (($m[$i++] - 0xE0) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80;
 556              } else {
 557                  $c = (($m[$i++] - 0xC0) << 6) + $m[$i++] - 0x80;
 558              }
 559  
 560              $entities .= '&#'.$c.';';
 561          }
 562  
 563          return $entities;
 564      }
 565  
 566      private static function title_case_lower($s)
 567      {
 568          return self::mb_convert_case($s[0], MB_CASE_LOWER, 'UTF-8');
 569      }
 570  
 571      private static function title_case_upper($s)
 572      {
 573          return self::mb_convert_case($s[0], MB_CASE_UPPER, 'UTF-8');
 574      }
 575  
 576      private static function getData($file)
 577      {
 578          if (file_exists($file = __DIR__.'/unidata/'.$file.'.ser')) {
 579              return unserialize(file_get_contents($file));
 580          }
 581  
 582          return false;
 583      }
 584  
 585      private static function getEncoding($encoding)
 586      {
 587          if (null === $encoding) {
 588              return self::$internalEncoding;
 589          }
 590  
 591          $encoding = strtoupper($encoding);
 592  
 593          if ('8BIT' === $encoding || 'BINARY' === $encoding) {
 594              return 'CP850';
 595          }
 596          if ('UTF8' === $encoding) {
 597              return 'UTF-8';
 598          }
 599  
 600          return $encoding;
 601      }
 602  }


Generated: Wed Nov 11 20:33:01 2020 Cross-referenced by PHPXref 0.7.1