[ Index ]

PHP Cross Reference of phpBB-3.3.14-deutsch

title

Body

[close]

/vendor/s9e/regexp-builder/src/Input/ -> Utf8.php (source)

   1  <?php declare(strict_types=1);
   2  
   3  /**
   4  * @package   s9e\RegexpBuilder
   5  * @copyright Copyright (c) 2016-2022 The s9e authors
   6  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License
   7  */
   8  namespace s9e\RegexpBuilder\Input;
   9  
  10  use InvalidArgumentException;
  11  use function array_map, ord, preg_match_all;
  12  
  13  class Utf8 extends BaseImplementation
  14  {
  15      /**
  16      * @var bool Whether to use surrogates to represent higher codepoints
  17      */
  18      protected $useSurrogates;
  19  
  20      /**
  21      * {@inheritdoc}
  22      */
  23  	public function __construct(array $options = [])
  24      {
  25          $this->useSurrogates = !empty($options['useSurrogates']);
  26      }
  27  
  28      /**
  29      * {@inheritdoc}
  30      */
  31  	public function split(string $string): array
  32      {
  33          if (preg_match_all('(.)us', $string, $matches) === false)
  34          {
  35              throw new InvalidArgumentException('Invalid UTF-8 string');
  36          }
  37  
  38          return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
  39      }
  40  
  41      /**
  42      * Convert a list of UTF-8 characters into a list of Unicode codepoint
  43      *
  44      * @param  string[]  $chars
  45      * @return integer[]
  46      */
  47  	protected function charsToCodepoints(array $chars): array
  48      {
  49          return array_map([$this, 'cp'], $chars);
  50      }
  51  
  52      /**
  53      * Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
  54      *
  55      * @param  string[]  $chars
  56      * @return integer[]
  57      */
  58  	protected function charsToCodepointsWithSurrogates(array $chars): array
  59      {
  60          $codepoints = [];
  61          foreach ($chars as $char)
  62          {
  63              $cp = $this->cp($char);
  64              if ($cp < 0x10000)
  65              {
  66                  $codepoints[] = $cp;
  67              }
  68              else
  69              {
  70                  $codepoints[] = 0xD7C0 + ($cp >> 10);
  71                  $codepoints[] = 0xDC00 + ($cp & 0x3FF);
  72              }
  73          }
  74  
  75          return $codepoints;
  76      }
  77  
  78      /**
  79      * Compute and return the Unicode codepoint for given UTF-8 char
  80      *
  81      * @param  string  $char UTF-8 char
  82      * @return integer
  83      */
  84      protected function cp(string $char): int
  85      {
  86          $cp = ord($char[0]);
  87          if ($cp >= 0xF0)
  88          {
  89              $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
  90          }
  91          elseif ($cp >= 0xE0)
  92          {
  93              $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
  94          }
  95          elseif ($cp >= 0xC0)
  96          {
  97              $cp = ($cp << 6) + ord($char[1]) - 0x3080;
  98          }
  99  
 100          return $cp;
 101      }
 102  }


Generated: Mon Nov 25 19:05:08 2024 Cross-referenced by PHPXref 0.7.1