[ Index ]

PHP Cross Reference of phpBB-3.3.5-deutsch

title

Body

[close]

/vendor/s9e/regexp-builder/src/Input/ -> Utf8.php (source)

   1  <?php declare(strict_types=1);
   2  
   3  /**
   4  * @package   s9e\RegexpBuilder
   5  * @copyright Copyright (c) 2016-2021 The s9e authors
   6  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License
   7  */
   8  namespace s9e\RegexpBuilder\Input;
   9  
  10  use InvalidArgumentException;
  11  
  12  class Utf8 extends BaseImplementation
  13  {
  14      /**
  15      * @var bool Whether to use surrogates to represent higher codepoints
  16      */
  17      protected $useSurrogates;
  18  
  19      /**
  20      * {@inheritdoc}
  21      */
  22  	public function __construct(array $options = [])
  23      {
  24          $this->useSurrogates = !empty($options['useSurrogates']);
  25      }
  26  
  27      /**
  28      * {@inheritdoc}
  29      */
  30  	public function split(string $string): array
  31      {
  32          if (preg_match_all('(.)us', $string, $matches) === false)
  33          {
  34              throw new InvalidArgumentException('Invalid UTF-8 string');
  35          }
  36  
  37          return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
  38      }
  39  
  40      /**
  41      * Convert a list of UTF-8 characters into a list of Unicode codepoint
  42      *
  43      * @param  string[]  $chars
  44      * @return integer[]
  45      */
  46  	protected function charsToCodepoints(array $chars): array
  47      {
  48          return array_map([$this, 'cp'], $chars);
  49      }
  50  
  51      /**
  52      * Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
  53      *
  54      * @param  string[]  $chars
  55      * @return integer[]
  56      */
  57  	protected function charsToCodepointsWithSurrogates(array $chars): array
  58      {
  59          $codepoints = [];
  60          foreach ($chars as $char)
  61          {
  62              $cp = $this->cp($char);
  63              if ($cp < 0x10000)
  64              {
  65                  $codepoints[] = $cp;
  66              }
  67              else
  68              {
  69                  $codepoints[] = 0xD7C0 + ($cp >> 10);
  70                  $codepoints[] = 0xDC00 + ($cp & 0x3FF);
  71              }
  72          }
  73  
  74          return $codepoints;
  75      }
  76  
  77      /**
  78      * Compute and return the Unicode codepoint for given UTF-8 char
  79      *
  80      * @param  string  $char UTF-8 char
  81      * @return integer
  82      */
  83      protected function cp(string $char): int
  84      {
  85          $cp = ord($char[0]);
  86          if ($cp >= 0xF0)
  87          {
  88              $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
  89          }
  90          elseif ($cp >= 0xE0)
  91          {
  92              $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
  93          }
  94          elseif ($cp >= 0xC0)
  95          {
  96              $cp = ($cp << 6) + ord($char[1]) - 0x3080;
  97          }
  98  
  99          return $cp;
 100      }
 101  }


Generated: Mon Oct 4 17:42:11 2021 Cross-referenced by PHPXref 0.7.1