[ Index ]

PHP Cross Reference of phpBB-3.1.12-deutsch

title

Body

[close]

/includes/utf/ -> utf_tools.php (source)

   1  <?php
   2  /**
   3  *
   4  * This file is part of the phpBB Forum Software package.
   5  *
   6  * @copyright (c) phpBB Limited <https://www.phpbb.com>
   7  * @license GNU General Public License, version 2 (GPL-2.0)
   8  *
   9  * For full copyright and license information, please see
  10  * the docs/CREDITS.txt file.
  11  *
  12  */
  13  
  14  /**
  15  */
  16  if (!defined('IN_PHPBB'))
  17  {
  18      exit;
  19  }
  20  
  21  // Enforce ASCII only string handling
  22  setlocale(LC_CTYPE, 'C');
  23  
  24  /**
  25  * UTF-8 tools
  26  *
  27  * Whenever possible, these functions will try to use PHP's built-in functions or
  28  * extensions, otherwise they will default to custom routines.
  29  *
  30  */
  31  
  32  if (!extension_loaded('xml'))
  33  {
  34      /**
  35      * Implementation of PHP's native utf8_encode for people without XML support
  36      * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
  37      *
  38      * @param string $str ISO-8859-1 encoded data
  39      * @return string UTF-8 encoded data
  40      */
  41  	function utf8_encode($str)
  42      {
  43          $out = '';
  44          for ($i = 0, $len = strlen($str); $i < $len; $i++)
  45          {
  46              $letter = $str[$i];
  47              $num = ord($letter);
  48              if ($num < 0x80)
  49              {
  50                  $out .= $letter;
  51              }
  52              else if ($num < 0xC0)
  53              {
  54                  $out .= "\xC2" . $letter;
  55              }
  56              else
  57              {
  58                  $out .= "\xC3" . chr($num - 64);
  59              }
  60          }
  61          return $out;
  62      }
  63  
  64      /**
  65      * Implementation of PHP's native utf8_decode for people without XML support
  66      *
  67      * @param string $str UTF-8 encoded data
  68      * @return string ISO-8859-1 encoded data
  69      */
  70  	function utf8_decode($str)
  71      {
  72          $pos = 0;
  73          $len = strlen($str);
  74          $ret = '';
  75  
  76          while ($pos < $len)
  77          {
  78              $ord = ord($str[$pos]) & 0xF0;
  79              if ($ord === 0xC0 || $ord === 0xD0)
  80              {
  81                  $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
  82                  $pos += 2;
  83                  $ret .= (($charval < 256) ? chr($charval) : '?');
  84              }
  85              else if ($ord === 0xE0)
  86              {
  87                  $ret .= '?';
  88                  $pos += 3;
  89              }
  90              else if ($ord === 0xF0)
  91              {
  92                  $ret .= '?';
  93                  $pos += 4;
  94              }
  95              else
  96              {
  97                  $ret .= $str[$pos];
  98                  ++$pos;
  99              }
 100          }
 101          return $ret;
 102      }
 103  }
 104  
 105  // mbstring is old and has it's functions around for older versions of PHP.
 106  // if mbstring is not loaded, we go into native mode.
 107  if (extension_loaded('mbstring'))
 108  {
 109      mb_internal_encoding('UTF-8');
 110  
 111      /**
 112      * UTF-8 aware alternative to strrpos
 113      * Find position of last occurrence of a char in a string
 114      */
 115      /**
 116      * UTF-8 aware alternative to strrpos
 117      * @ignore
 118      */
 119  	function utf8_strrpos($str,    $needle, $offset = null)
 120      {
 121          // Emulate behaviour of strrpos rather than raising warning
 122          if (empty($str))
 123          {
 124              return false;
 125          }
 126  
 127          if (is_null($offset))
 128          {
 129              return mb_strrpos($str, $needle);
 130          }
 131          else
 132          {
 133              return mb_strrpos($str, $needle, $offset);
 134          }
 135      }
 136  
 137      /**
 138      * UTF-8 aware alternative to strpos
 139      * @ignore
 140      */
 141  	function utf8_strpos($str, $needle, $offset = null)
 142      {
 143          if (is_null($offset))
 144          {
 145              return mb_strpos($str, $needle);
 146          }
 147          else
 148          {
 149              return mb_strpos($str, $needle, $offset);
 150          }
 151      }
 152  
 153      /**
 154      * UTF-8 aware alternative to strtolower
 155      * @ignore
 156      */
 157  	function utf8_strtolower($str)
 158      {
 159          return mb_strtolower($str);
 160      }
 161  
 162      /**
 163      * UTF-8 aware alternative to strtoupper
 164      * @ignore
 165      */
 166  	function utf8_strtoupper($str)
 167      {
 168          return mb_strtoupper($str);
 169      }
 170  
 171      /**
 172      * UTF-8 aware alternative to substr
 173      * @ignore
 174      */
 175  	function utf8_substr($str, $offset, $length = null)
 176      {
 177          if (is_null($length))
 178          {
 179              return mb_substr($str, $offset);
 180          }
 181          else
 182          {
 183              return mb_substr($str, $offset, $length);
 184          }
 185      }
 186  
 187      /**
 188      * Return the length (in characters) of a UTF-8 string
 189      * @ignore
 190      */
 191  	function utf8_strlen($text)
 192      {
 193          return mb_strlen($text, 'utf-8');
 194      }
 195  }
 196  else
 197  {
 198      /**
 199      * UTF-8 aware alternative to strrpos
 200      * Find position of last occurrence of a char in a string
 201      *
 202      * @author Harry Fuecks
 203      * @param string $str haystack
 204      * @param string $needle needle
 205      * @param integer $offset (optional) offset (from left)
 206      * @return mixed integer position or FALSE on failure
 207      */
 208  	function utf8_strrpos($str,    $needle, $offset = null)
 209      {
 210          if (is_null($offset))
 211          {
 212              $ar    = explode($needle, $str);
 213  
 214              if (sizeof($ar) > 1)
 215              {
 216                  // Pop off the end of the string where the last    match was made
 217                  array_pop($ar);
 218                  $str = join($needle, $ar);
 219  
 220                  return utf8_strlen($str);
 221              }
 222              return false;
 223          }
 224          else
 225          {
 226              if (!is_int($offset))
 227              {
 228                  trigger_error('utf8_strrpos    expects    parameter 3    to be long', E_USER_ERROR);
 229                  return false;
 230              }
 231  
 232              $str = utf8_substr($str, $offset);
 233  
 234              if (false !== ($pos = utf8_strrpos($str, $needle)))
 235              {
 236                  return $pos    + $offset;
 237              }
 238  
 239              return false;
 240          }
 241      }
 242  
 243      /**
 244      * UTF-8 aware alternative to strpos
 245      * Find position of first occurrence of a string
 246      *
 247      * @author Harry Fuecks
 248      * @param string $str haystack
 249      * @param string $needle needle
 250      * @param integer $offset offset in characters (from left)
 251      * @return mixed integer position or FALSE on failure
 252      */
 253  	function utf8_strpos($str, $needle, $offset = null)
 254      {
 255          if (is_null($offset))
 256          {
 257              $ar = explode($needle, $str);
 258              if (sizeof($ar) > 1)
 259              {
 260                  return utf8_strlen($ar[0]);
 261              }
 262              return false;
 263          }
 264          else
 265          {
 266              if (!is_int($offset))
 267              {
 268                  trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
 269                  return false;
 270              }
 271  
 272              $str = utf8_substr($str, $offset);
 273  
 274              if (false !== ($pos = utf8_strpos($str, $needle)))
 275              {
 276                  return $pos + $offset;
 277              }
 278  
 279              return false;
 280          }
 281      }
 282  
 283      /**
 284      * UTF-8 aware alternative to strtolower
 285      * Make a string lowercase
 286      * Note: The concept of a characters "case" only exists is some alphabets
 287      * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 288      * not exist in the Chinese alphabet, for example. See Unicode Standard
 289      * Annex #21: Case Mappings
 290      *
 291      * @param string
 292      * @return string string in lowercase
 293      */
 294  	function utf8_strtolower($string)
 295      {
 296          static $utf8_upper_to_lower = array(
 297              "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
 298              "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
 299              "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
 300              "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
 301              "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
 302              "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
 303              "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
 304              "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
 305              "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
 306              "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
 307              "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
 308              "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
 309              "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
 310              "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
 311              "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
 312              "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
 313              "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
 314              "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
 315              "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
 316              "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
 317              "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
 318              "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
 319              "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
 320              "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
 321              "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
 322              "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
 323              "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
 324              "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
 325              "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
 326              "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
 327              "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
 328              "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
 329              "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
 330              "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
 331              "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
 332              "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
 333              "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
 334              "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
 335              "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
 336              "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
 337              "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
 338              "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
 339              "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
 340              "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
 341              "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
 342              "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
 343          );
 344  
 345          return strtr(strtolower($string), $utf8_upper_to_lower);
 346      }
 347  
 348      /**
 349      * UTF-8 aware alternative to strtoupper
 350      * Make a string uppercase
 351      * Note: The concept of a characters "case" only exists is some alphabets
 352      * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 353      * not exist in the Chinese alphabet, for example. See Unicode Standard
 354      * Annex #21: Case Mappings
 355      *
 356      * @param string
 357      * @return string string in uppercase
 358      */
 359  	function utf8_strtoupper($string)
 360      {
 361          static $utf8_lower_to_upper = array(
 362              "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
 363              "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
 364              "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
 365              "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
 366              "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
 367              "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
 368              "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
 369              "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
 370              "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
 371              "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
 372              "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
 373              "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
 374              "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
 375              "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
 376              "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
 377              "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
 378              "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
 379              "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
 380              "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
 381              "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
 382              "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
 383              "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
 384              "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
 385              "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
 386              "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
 387              "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
 388              "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
 389              "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
 390              "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
 391              "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
 392              "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
 393              "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
 394              "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
 395              "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
 396              "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
 397              "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
 398              "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
 399              "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
 400              "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
 401              "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
 402              "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
 403              "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
 404              "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
 405              "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
 406              "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
 407              "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
 408          );
 409  
 410          return strtr(strtoupper($string), $utf8_lower_to_upper);
 411      }
 412  
 413      /**
 414      * UTF-8 aware alternative to substr
 415      * Return part of a string given character offset (and optionally length)
 416      *
 417      * Note arguments: comparied to substr - if offset or length are
 418      * not integers, this version will not complain but rather massages them
 419      * into an integer.
 420      *
 421      * Note on returned values: substr documentation states false can be
 422      * returned in some cases (e.g. offset > string length)
 423      * mb_substr never returns false, it will return an empty string instead.
 424      * This adopts the mb_substr approach
 425      *
 426      * Note on implementation: PCRE only supports repetitions of less than
 427      * 65536, in order to accept up to MAXINT values for offset and length,
 428      * we'll repeat a group of 65535 characters when needed.
 429      *
 430      * Note on implementation: calculating the number of characters in the
 431      * string is a relatively expensive operation, so we only carry it out when
 432      * necessary. It isn't necessary for +ve offsets and no specified length
 433      *
 434      * @author Chris Smith<chris@jalakai.co.uk>
 435      * @param string $str
 436      * @param integer $offset number of UTF-8 characters offset (from left)
 437      * @param integer $length (optional) length in UTF-8 characters from offset
 438      * @return mixed string or FALSE if failure
 439      */
 440  	function utf8_substr($str, $offset, $length = NULL)
 441      {
 442          // generates E_NOTICE
 443          // for PHP4 objects, but not PHP5 objects
 444          $str = (string) $str;
 445          $offset = (int) $offset;
 446          if (!is_null($length))
 447          {
 448              $length = (int) $length;
 449          }
 450  
 451          // handle trivial cases
 452          if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
 453          {
 454              return '';
 455          }
 456  
 457          // normalise negative offsets (we could use a tail
 458          // anchored pattern, but they are horribly slow!)
 459          if ($offset < 0)
 460          {
 461              // see notes
 462              $strlen = utf8_strlen($str);
 463              $offset = $strlen + $offset;
 464              if ($offset < 0)
 465              {
 466                  $offset = 0;
 467              }
 468          }
 469  
 470          $op = '';
 471          $lp = '';
 472  
 473          // establish a pattern for offset, a
 474          // non-captured group equal in length to offset
 475          if ($offset > 0)
 476          {
 477              $ox = (int) ($offset / 65535);
 478              $oy = $offset % 65535;
 479  
 480              if ($ox)
 481              {
 482                  $op = '(?:.{65535}){' . $ox . '}';
 483              }
 484  
 485              $op = '^(?:' . $op . '.{' . $oy . '})';
 486          }
 487          else
 488          {
 489              // offset == 0; just anchor the pattern
 490              $op = '^';
 491          }
 492  
 493          // establish a pattern for length
 494          if (is_null($length))
 495          {
 496              // the rest of the string
 497              $lp = '(.*)$';
 498          }
 499          else
 500          {
 501              if (!isset($strlen))
 502              {
 503                  // see notes
 504                  $strlen = utf8_strlen($str);
 505              }
 506  
 507              // another trivial case
 508              if ($offset > $strlen)
 509              {
 510                  return '';
 511              }
 512  
 513              if ($length > 0)
 514              {
 515                  // reduce any length that would
 516                  // go passed the end of the string
 517                  $length = min($strlen - $offset, $length);
 518  
 519                  $lx = (int) ($length / 65535);
 520                  $ly = $length % 65535;
 521  
 522                  // negative length requires a captured group
 523                  // of length characters
 524                  if ($lx)
 525                  {
 526                      $lp = '(?:.{65535}){' . $lx . '}';
 527                  }
 528                  $lp = '(' . $lp . '.{'. $ly . '})';
 529              }
 530              else if ($length < 0)
 531              {
 532                  if ($length < ($offset - $strlen))
 533                  {
 534                      return '';
 535                  }
 536  
 537                  $lx = (int) ((-$length) / 65535);
 538                  $ly = (-$length) % 65535;
 539  
 540                  // negative length requires ... capture everything
 541                  // except a group of  -length characters
 542                  // anchored at the tail-end of the string
 543                  if ($lx)
 544                  {
 545                      $lp = '(?:.{65535}){' . $lx . '}';
 546                  }
 547                  $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
 548              }
 549          }
 550  
 551          if (!preg_match('#' . $op . $lp . '#us', $str, $match))
 552          {
 553              return '';
 554          }
 555  
 556          return $match[1];
 557      }
 558  
 559      /**
 560      * Return the length (in characters) of a UTF-8 string
 561      *
 562      * @param    string    $text        UTF-8 string
 563      * @return    integer                Length (in chars) of given string
 564      */
 565  	function utf8_strlen($text)
 566      {
 567          // Since utf8_decode is replacing multibyte characters to ? strlen works fine
 568          return strlen(utf8_decode($text));
 569      }
 570  }
 571  
 572  /**
 573  * UTF-8 aware alternative to str_split
 574  * Convert a string to an array
 575  *
 576  * @author Harry Fuecks
 577  * @param string $str UTF-8 encoded
 578  * @param int $split_len number to characters to split string by
 579  * @return array characters in string reverses
 580  */
 581  function utf8_str_split($str, $split_len = 1)
 582  {
 583      if (!is_int($split_len) || $split_len < 1)
 584      {
 585          return false;
 586      }
 587  
 588      $len = utf8_strlen($str);
 589      if ($len <= $split_len)
 590      {
 591          return array($str);
 592      }
 593  
 594      preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
 595      return $ar[0];
 596  }
 597  
 598  /**
 599  * UTF-8 aware alternative to strspn
 600  * Find length of initial segment matching the mask
 601  *
 602  * @author Harry Fuecks
 603  */
 604  function utf8_strspn($str, $mask, $start = null, $length = null)
 605  {
 606      if ($start !== null || $length !== null)
 607      {
 608          $str = utf8_substr($str, $start, $length);
 609      }
 610  
 611      preg_match('/^[' . $mask . ']+/u', $str, $matches);
 612  
 613      if (isset($matches[0]))
 614      {
 615          return utf8_strlen($matches[0]);
 616      }
 617  
 618      return 0;
 619  }
 620  
 621  /**
 622  * UTF-8 aware alternative to ucfirst
 623  * Make a string's first character uppercase
 624  *
 625  * @author Harry Fuecks
 626  * @param string
 627  * @return string with first character as upper case (if applicable)
 628  */
 629  function utf8_ucfirst($str)
 630  {
 631      switch (utf8_strlen($str))
 632      {
 633          case 0:
 634              return '';
 635          break;
 636  
 637          case 1:
 638              return utf8_strtoupper($str);
 639          break;
 640  
 641          default:
 642              preg_match('/^(.{1})(.*)$/us', $str, $matches);
 643              return utf8_strtoupper($matches[1]) . $matches[2];
 644          break;
 645      }
 646  }
 647  
 648  /**
 649  * Recode a string to UTF-8
 650  *
 651  * If the encoding is not supported, the string is returned as-is
 652  *
 653  * @param    string    $string        Original string
 654  * @param    string    $encoding    Original encoding (lowered)
 655  * @return    string                The string, encoded in UTF-8
 656  */
 657  function utf8_recode($string, $encoding)
 658  {
 659      $encoding = strtolower($encoding);
 660  
 661      if ($encoding == 'utf-8' || !is_string($string) || empty($string))
 662      {
 663          return $string;
 664      }
 665  
 666      // we force iso-8859-1 to be cp1252
 667      if ($encoding == 'iso-8859-1')
 668      {
 669          $encoding = 'cp1252';
 670      }
 671      // convert iso-8859-8-i to iso-8859-8
 672      else if ($encoding == 'iso-8859-8-i')
 673      {
 674          $encoding = 'iso-8859-8';
 675          $string = hebrev($string);
 676      }
 677  
 678      // First, try iconv()
 679      if (function_exists('iconv'))
 680      {
 681          $ret = @iconv($encoding, 'utf-8', $string);
 682  
 683          if (!empty($ret))
 684          {
 685              return $ret;
 686          }
 687      }
 688  
 689      // Try the mb_string extension
 690      if (function_exists('mb_convert_encoding'))
 691      {
 692          // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
 693          switch ($encoding)
 694          {
 695              case 'iso-8859-1':
 696              case 'iso-8859-2':
 697              case 'iso-8859-4':
 698              case 'iso-8859-7':
 699              case 'iso-8859-9':
 700              case 'iso-8859-15':
 701              case 'windows-1251':
 702              case 'windows-1252':
 703              case 'cp1252':
 704              case 'shift_jis':
 705              case 'euc-kr':
 706              case 'big5':
 707              case 'gb2312':
 708                  $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
 709  
 710                  if (!empty($ret))
 711                  {
 712                      return $ret;
 713                  }
 714          }
 715      }
 716  
 717      // Try the recode extension
 718      if (function_exists('recode_string'))
 719      {
 720          $ret = @recode_string($encoding . '..utf-8', $string);
 721  
 722          if (!empty($ret))
 723          {
 724              return $ret;
 725          }
 726      }
 727  
 728      // If nothing works, check if we have a custom transcoder available
 729      if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
 730      {
 731          // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
 732          trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 733      }
 734  
 735      global $phpbb_root_path, $phpEx;
 736  
 737      // iso-8859-* character encoding
 738      if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
 739      {
 740          switch ($array[1])
 741          {
 742              case '1':
 743              case '2':
 744              case '4':
 745              case '7':
 746              case '8':
 747              case '9':
 748              case '15':
 749                  if (!function_exists('iso_8859_' . $array[1]))
 750                  {
 751                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
 752                      {
 753                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
 754                      }
 755                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
 756                  }
 757                  return call_user_func('iso_8859_' . $array[1], $string);
 758              break;
 759  
 760              default:
 761                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 762              break;
 763          }
 764      }
 765  
 766      // CP/WIN character encoding
 767      if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
 768      {
 769          switch ($array[1])
 770          {
 771              case '932':
 772              break;
 773              case '1250':
 774              case '1251':
 775              case '1252':
 776              case '1254':
 777              case '1255':
 778              case '1256':
 779              case '1257':
 780              case '874':
 781                  if (!function_exists('cp' . $array[1]))
 782                  {
 783                      if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
 784                      {
 785                          trigger_error('Basic reencoder file is missing', E_USER_ERROR);
 786                      }
 787                      include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
 788                  }
 789                  return call_user_func('cp' . $array[1], $string);
 790              break;
 791  
 792              default:
 793                  trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 794              break;
 795          }
 796      }
 797  
 798      // TIS-620
 799      if (preg_match('/tis[_ -]?620/', $encoding))
 800      {
 801          if (!function_exists('tis_620'))
 802          {
 803              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
 804              {
 805                  trigger_error('Basic reencoder file is missing', E_USER_ERROR);
 806              }
 807              include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
 808          }
 809          return tis_620($string);
 810      }
 811  
 812      // SJIS
 813      if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
 814      {
 815          if (!function_exists('sjis'))
 816          {
 817              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 818              {
 819                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 820              }
 821              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 822          }
 823          return sjis($string);
 824      }
 825  
 826      // EUC_KR
 827      if (preg_match('/euc[_ -]?kr/', $encoding))
 828      {
 829          if (!function_exists('euc_kr'))
 830          {
 831              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 832              {
 833                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 834              }
 835              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 836          }
 837          return euc_kr($string);
 838      }
 839  
 840      // BIG-5
 841      if (preg_match('/big[_ -]?5/', $encoding))
 842      {
 843          if (!function_exists('big5'))
 844          {
 845              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 846              {
 847                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 848              }
 849              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 850          }
 851          return big5($string);
 852      }
 853  
 854      // GB2312
 855      if (preg_match('/gb[_ -]?2312/', $encoding))
 856      {
 857          if (!function_exists('gb2312'))
 858          {
 859              if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 860              {
 861                  trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 862              }
 863              include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 864          }
 865          return gb2312($string);
 866      }
 867  
 868      // Trigger an error?! Fow now just give bad data :-(
 869      trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 870      //return $string; // use utf_normalizer::cleanup() ?
 871  }
 872  
 873  /**
 874  * Replace all UTF-8 chars that are not in ASCII with their NCR
 875  *
 876  * @param    string    $text        UTF-8 string in NFC
 877  * @return    string                ASCII string using NCRs for non-ASCII chars
 878  */
 879  function utf8_encode_ncr($text)
 880  {
 881      return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
 882  }
 883  
 884  /**
 885  * Callback used in encode_ncr()
 886  *
 887  * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
 888  *
 889  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
 890  * @return    string                A HTML NCR if the character is valid, or the original string otherwise
 891  */
 892  function utf8_encode_ncr_callback($m)
 893  {
 894      return '&#' . utf8_ord($m[0]) . ';';
 895  }
 896  
 897  /**
 898  * Converts a UTF-8 char to an NCR
 899  *
 900  * @param string $chr UTF-8 char
 901  * @return integer UNICODE code point
 902  */
 903  function utf8_ord($chr)
 904  {
 905      switch (strlen($chr))
 906      {
 907          case 1:
 908              return ord($chr);
 909          break;
 910  
 911          case 2:
 912              return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
 913          break;
 914  
 915          case 3:
 916              return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
 917          break;
 918  
 919          case 4:
 920              return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
 921          break;
 922  
 923          default:
 924              return $chr;
 925      }
 926  }
 927  
 928  /**
 929  * Converts an NCR to a UTF-8 char
 930  *
 931  * @param    int        $cp    UNICODE code point
 932  * @return    string        UTF-8 char
 933  */
 934  function utf8_chr($cp)
 935  {
 936      if ($cp > 0xFFFF)
 937      {
 938          return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 939      }
 940      else if ($cp > 0x7FF)
 941      {
 942          return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 943      }
 944      else if ($cp > 0x7F)
 945      {
 946          return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
 947      }
 948      else
 949      {
 950          return chr($cp);
 951      }
 952  }
 953  
 954  /**
 955  * Convert Numeric Character References to UTF-8 chars
 956  *
 957  * Notes:
 958  *    - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
 959  *    - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
 960  *
 961  * @param    string    $text        String to convert, encoded in UTF-8 (no normal form required)
 962  * @return    string                UTF-8 string where NCRs have been replaced with the actual chars
 963  */
 964  function utf8_decode_ncr($text)
 965  {
 966      return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
 967  }
 968  
 969  /**
 970  * Callback used in decode_ncr()
 971  *
 972  * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
 973  * It will ignore most of invalid NCRs, but not all!
 974  *
 975  * @param    array    $m            0-based numerically indexed array passed by preg_replace_callback()
 976  * @return    string                UTF-8 char
 977  */
 978  function utf8_decode_ncr_callback($m)
 979  {
 980      $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
 981  
 982      return utf8_chr($cp);
 983  }
 984  
 985  /**
 986  * Case folds a unicode string as per Unicode 5.0, section 3.13
 987  *
 988  * @param    string    $text    text to be case folded
 989  * @param    string    $option    determines how we will fold the cases
 990  * @return    string            case folded text
 991  */
 992  function utf8_case_fold($text, $option = 'full')
 993  {
 994      static $uniarray = array();
 995      global $phpbb_root_path, $phpEx;
 996  
 997      // common is always set
 998      if (!isset($uniarray['c']))
 999      {
1000          $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
1001      }
1002  
1003      // only set full if we need to
1004      if ($option === 'full' && !isset($uniarray['f']))
1005      {
1006          $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
1007      }
1008  
1009      // only set simple if we need to
1010      if ($option !== 'full' && !isset($uniarray['s']))
1011      {
1012          $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
1013      }
1014  
1015      // common is always replaced
1016      $text = strtr($text, $uniarray['c']);
1017  
1018      if ($option === 'full')
1019      {
1020          // full replaces a character with multiple characters
1021          $text = strtr($text, $uniarray['f']);
1022      }
1023      else
1024      {
1025          // simple replaces a character with another character
1026          $text = strtr($text, $uniarray['s']);
1027      }
1028  
1029      return $text;
1030  }
1031  
1032  /**
1033  * Takes the input and does a "special" case fold. It does minor normalization
1034  * and returns NFKC compatable text
1035  *
1036  * @param    string    $text    text to be case folded
1037  * @param    string    $option    determines how we will fold the cases
1038  * @return    string            case folded text
1039  */
1040  function utf8_case_fold_nfkc($text, $option = 'full')
1041  {
1042      static $fc_nfkc_closure = array(
1043          "\xCD\xBA"    => "\x20\xCE\xB9",
1044          "\xCF\x92"    => "\xCF\x85",
1045          "\xCF\x93"    => "\xCF\x8D",
1046          "\xCF\x94"    => "\xCF\x8B",
1047          "\xCF\xB2"    => "\xCF\x83",
1048          "\xCF\xB9"    => "\xCF\x83",
1049          "\xE1\xB4\xAC"    => "\x61",
1050          "\xE1\xB4\xAD"    => "\xC3\xA6",
1051          "\xE1\xB4\xAE"    => "\x62",
1052          "\xE1\xB4\xB0"    => "\x64",
1053          "\xE1\xB4\xB1"    => "\x65",
1054          "\xE1\xB4\xB2"    => "\xC7\x9D",
1055          "\xE1\xB4\xB3"    => "\x67",
1056          "\xE1\xB4\xB4"    => "\x68",
1057          "\xE1\xB4\xB5"    => "\x69",
1058          "\xE1\xB4\xB6"    => "\x6A",
1059          "\xE1\xB4\xB7"    => "\x6B",
1060          "\xE1\xB4\xB8"    => "\x6C",
1061          "\xE1\xB4\xB9"    => "\x6D",
1062          "\xE1\xB4\xBA"    => "\x6E",
1063          "\xE1\xB4\xBC"    => "\x6F",
1064          "\xE1\xB4\xBD"    => "\xC8\xA3",
1065          "\xE1\xB4\xBE"    => "\x70",
1066          "\xE1\xB4\xBF"    => "\x72",
1067          "\xE1\xB5\x80"    => "\x74",
1068          "\xE1\xB5\x81"    => "\x75",
1069          "\xE1\xB5\x82"    => "\x77",
1070          "\xE2\x82\xA8"    => "\x72\x73",
1071          "\xE2\x84\x82"    => "\x63",
1072          "\xE2\x84\x83"    => "\xC2\xB0\x63",
1073          "\xE2\x84\x87"    => "\xC9\x9B",
1074          "\xE2\x84\x89"    => "\xC2\xB0\x66",
1075          "\xE2\x84\x8B"    => "\x68",
1076          "\xE2\x84\x8C"    => "\x68",
1077          "\xE2\x84\x8D"    => "\x68",
1078          "\xE2\x84\x90"    => "\x69",
1079          "\xE2\x84\x91"    => "\x69",
1080          "\xE2\x84\x92"    => "\x6C",
1081          "\xE2\x84\x95"    => "\x6E",
1082          "\xE2\x84\x96"    => "\x6E\x6F",
1083          "\xE2\x84\x99"    => "\x70",
1084          "\xE2\x84\x9A"    => "\x71",
1085          "\xE2\x84\x9B"    => "\x72",
1086          "\xE2\x84\x9C"    => "\x72",
1087          "\xE2\x84\x9D"    => "\x72",
1088          "\xE2\x84\xA0"    => "\x73\x6D",
1089          "\xE2\x84\xA1"    => "\x74\x65\x6C",
1090          "\xE2\x84\xA2"    => "\x74\x6D",
1091          "\xE2\x84\xA4"    => "\x7A",
1092          "\xE2\x84\xA8"    => "\x7A",
1093          "\xE2\x84\xAC"    => "\x62",
1094          "\xE2\x84\xAD"    => "\x63",
1095          "\xE2\x84\xB0"    => "\x65",
1096          "\xE2\x84\xB1"    => "\x66",
1097          "\xE2\x84\xB3"    => "\x6D",
1098          "\xE2\x84\xBB"    => "\x66\x61\x78",
1099          "\xE2\x84\xBE"    => "\xCE\xB3",
1100          "\xE2\x84\xBF"    => "\xCF\x80",
1101          "\xE2\x85\x85"    => "\x64",
1102          "\xE3\x89\x90"    => "\x70\x74\x65",
1103          "\xE3\x8B\x8C"    => "\x68\x67",
1104          "\xE3\x8B\x8E"    => "\x65\x76",
1105          "\xE3\x8B\x8F"    => "\x6C\x74\x64",
1106          "\xE3\x8D\xB1"    => "\x68\x70\x61",
1107          "\xE3\x8D\xB3"    => "\x61\x75",
1108          "\xE3\x8D\xB5"    => "\x6F\x76",
1109          "\xE3\x8D\xBA"    => "\x69\x75",
1110          "\xE3\x8E\x80"    => "\x70\x61",
1111          "\xE3\x8E\x81"    => "\x6E\x61",
1112          "\xE3\x8E\x82"    => "\xCE\xBC\x61",
1113          "\xE3\x8E\x83"    => "\x6D\x61",
1114          "\xE3\x8E\x84"    => "\x6B\x61",
1115          "\xE3\x8E\x85"    => "\x6B\x62",
1116          "\xE3\x8E\x86"    => "\x6D\x62",
1117          "\xE3\x8E\x87"    => "\x67\x62",
1118          "\xE3\x8E\x8A"    => "\x70\x66",
1119          "\xE3\x8E\x8B"    => "\x6E\x66",
1120          "\xE3\x8E\x8C"    => "\xCE\xBC\x66",
1121          "\xE3\x8E\x90"    => "\x68\x7A",
1122          "\xE3\x8E\x91"    => "\x6B\x68\x7A",
1123          "\xE3\x8E\x92"    => "\x6D\x68\x7A",
1124          "\xE3\x8E\x93"    => "\x67\x68\x7A",
1125          "\xE3\x8E\x94"    => "\x74\x68\x7A",
1126          "\xE3\x8E\xA9"    => "\x70\x61",
1127          "\xE3\x8E\xAA"    => "\x6B\x70\x61",
1128          "\xE3\x8E\xAB"    => "\x6D\x70\x61",
1129          "\xE3\x8E\xAC"    => "\x67\x70\x61",
1130          "\xE3\x8E\xB4"    => "\x70\x76",
1131          "\xE3\x8E\xB5"    => "\x6E\x76",
1132          "\xE3\x8E\xB6"    => "\xCE\xBC\x76",
1133          "\xE3\x8E\xB7"    => "\x6D\x76",
1134          "\xE3\x8E\xB8"    => "\x6B\x76",
1135          "\xE3\x8E\xB9"    => "\x6D\x76",
1136          "\xE3\x8E\xBA"    => "\x70\x77",
1137          "\xE3\x8E\xBB"    => "\x6E\x77",
1138          "\xE3\x8E\xBC"    => "\xCE\xBC\x77",
1139          "\xE3\x8E\xBD"    => "\x6D\x77",
1140          "\xE3\x8E\xBE"    => "\x6B\x77",
1141          "\xE3\x8E\xBF"    => "\x6D\x77",
1142          "\xE3\x8F\x80"    => "\x6B\xCF\x89",
1143          "\xE3\x8F\x81"    => "\x6D\xCF\x89",
1144          "\xE3\x8F\x83"    => "\x62\x71",
1145          "\xE3\x8F\x86"    => "\x63\xE2\x88\x95\x6B\x67",
1146          "\xE3\x8F\x87"    => "\x63\x6F\x2E",
1147          "\xE3\x8F\x88"    => "\x64\x62",
1148          "\xE3\x8F\x89"    => "\x67\x79",
1149          "\xE3\x8F\x8B"    => "\x68\x70",
1150          "\xE3\x8F\x8D"    => "\x6B\x6B",
1151          "\xE3\x8F\x8E"    => "\x6B\x6D",
1152          "\xE3\x8F\x97"    => "\x70\x68",
1153          "\xE3\x8F\x99"    => "\x70\x70\x6D",
1154          "\xE3\x8F\x9A"    => "\x70\x72",
1155          "\xE3\x8F\x9C"    => "\x73\x76",
1156          "\xE3\x8F\x9D"    => "\x77\x62",
1157          "\xE3\x8F\x9E"    => "\x76\xE2\x88\x95\x6D",
1158          "\xE3\x8F\x9F"    => "\x61\xE2\x88\x95\x6D",
1159          "\xF0\x9D\x90\x80"    => "\x61",
1160          "\xF0\x9D\x90\x81"    => "\x62",
1161          "\xF0\x9D\x90\x82"    => "\x63",
1162          "\xF0\x9D\x90\x83"    => "\x64",
1163          "\xF0\x9D\x90\x84"    => "\x65",
1164          "\xF0\x9D\x90\x85"    => "\x66",
1165          "\xF0\x9D\x90\x86"    => "\x67",
1166          "\xF0\x9D\x90\x87"    => "\x68",
1167          "\xF0\x9D\x90\x88"    => "\x69",
1168          "\xF0\x9D\x90\x89"    => "\x6A",
1169          "\xF0\x9D\x90\x8A"    => "\x6B",
1170          "\xF0\x9D\x90\x8B"    => "\x6C",
1171          "\xF0\x9D\x90\x8C"    => "\x6D",
1172          "\xF0\x9D\x90\x8D"    => "\x6E",
1173          "\xF0\x9D\x90\x8E"    => "\x6F",
1174          "\xF0\x9D\x90\x8F"    => "\x70",
1175          "\xF0\x9D\x90\x90"    => "\x71",
1176          "\xF0\x9D\x90\x91"    => "\x72",
1177          "\xF0\x9D\x90\x92"    => "\x73",
1178          "\xF0\x9D\x90\x93"    => "\x74",
1179          "\xF0\x9D\x90\x94"    => "\x75",
1180          "\xF0\x9D\x90\x95"    => "\x76",
1181          "\xF0\x9D\x90\x96"    => "\x77",
1182          "\xF0\x9D\x90\x97"    => "\x78",
1183          "\xF0\x9D\x90\x98"    => "\x79",
1184          "\xF0\x9D\x90\x99"    => "\x7A",
1185          "\xF0\x9D\x90\xB4"    => "\x61",
1186          "\xF0\x9D\x90\xB5"    => "\x62",
1187          "\xF0\x9D\x90\xB6"    => "\x63",
1188          "\xF0\x9D\x90\xB7"    => "\x64",
1189          "\xF0\x9D\x90\xB8"    => "\x65",
1190          "\xF0\x9D\x90\xB9"    => "\x66",
1191          "\xF0\x9D\x90\xBA"    => "\x67",
1192          "\xF0\x9D\x90\xBB"    => "\x68",
1193          "\xF0\x9D\x90\xBC"    => "\x69",
1194          "\xF0\x9D\x90\xBD"    => "\x6A",
1195          "\xF0\x9D\x90\xBE"    => "\x6B",
1196          "\xF0\x9D\x90\xBF"    => "\x6C",
1197          "\xF0\x9D\x91\x80"    => "\x6D",
1198          "\xF0\x9D\x91\x81"    => "\x6E",
1199          "\xF0\x9D\x91\x82"    => "\x6F",
1200          "\xF0\x9D\x91\x83"    => "\x70",
1201          "\xF0\x9D\x91\x84"    => "\x71",
1202          "\xF0\x9D\x91\x85"    => "\x72",
1203          "\xF0\x9D\x91\x86"    => "\x73",
1204          "\xF0\x9D\x91\x87"    => "\x74",
1205          "\xF0\x9D\x91\x88"    => "\x75",
1206          "\xF0\x9D\x91\x89"    => "\x76",
1207          "\xF0\x9D\x91\x8A"    => "\x77",
1208          "\xF0\x9D\x91\x8B"    => "\x78",
1209          "\xF0\x9D\x91\x8C"    => "\x79",
1210          "\xF0\x9D\x91\x8D"    => "\x7A",
1211          "\xF0\x9D\x91\xA8"    => "\x61",
1212          "\xF0\x9D\x91\xA9"    => "\x62",
1213          "\xF0\x9D\x91\xAA"    => "\x63",
1214          "\xF0\x9D\x91\xAB"    => "\x64",
1215          "\xF0\x9D\x91\xAC"    => "\x65",
1216          "\xF0\x9D\x91\xAD"    => "\x66",
1217          "\xF0\x9D\x91\xAE"    => "\x67",
1218          "\xF0\x9D\x91\xAF"    => "\x68",
1219          "\xF0\x9D\x91\xB0"    => "\x69",
1220          "\xF0\x9D\x91\xB1"    => "\x6A",
1221          "\xF0\x9D\x91\xB2"    => "\x6B",
1222          "\xF0\x9D\x91\xB3"    => "\x6C",
1223          "\xF0\x9D\x91\xB4"    => "\x6D",
1224          "\xF0\x9D\x91\xB5"    => "\x6E",
1225          "\xF0\x9D\x91\xB6"    => "\x6F",
1226          "\xF0\x9D\x91\xB7"    => "\x70",
1227          "\xF0\x9D\x91\xB8"    => "\x71",
1228          "\xF0\x9D\x91\xB9"    => "\x72",
1229          "\xF0\x9D\x91\xBA"    => "\x73",
1230          "\xF0\x9D\x91\xBB"    => "\x74",
1231          "\xF0\x9D\x91\xBC"    => "\x75",
1232          "\xF0\x9D\x91\xBD"    => "\x76",
1233          "\xF0\x9D\x91\xBE"    => "\x77",
1234          "\xF0\x9D\x91\xBF"    => "\x78",
1235          "\xF0\x9D\x92\x80"    => "\x79",
1236          "\xF0\x9D\x92\x81"    => "\x7A",
1237          "\xF0\x9D\x92\x9C"    => "\x61",
1238          "\xF0\x9D\x92\x9E"    => "\x63",
1239          "\xF0\x9D\x92\x9F"    => "\x64",
1240          "\xF0\x9D\x92\xA2"    => "\x67",
1241          "\xF0\x9D\x92\xA5"    => "\x6A",
1242          "\xF0\x9D\x92\xA6"    => "\x6B",
1243          "\xF0\x9D\x92\xA9"    => "\x6E",
1244          "\xF0\x9D\x92\xAA"    => "\x6F",
1245          "\xF0\x9D\x92\xAB"    => "\x70",
1246          "\xF0\x9D\x92\xAC"    => "\x71",
1247          "\xF0\x9D\x92\xAE"    => "\x73",
1248          "\xF0\x9D\x92\xAF"    => "\x74",
1249          "\xF0\x9D\x92\xB0"    => "\x75",
1250          "\xF0\x9D\x92\xB1"    => "\x76",
1251          "\xF0\x9D\x92\xB2"    => "\x77",
1252          "\xF0\x9D\x92\xB3"    => "\x78",
1253          "\xF0\x9D\x92\xB4"    => "\x79",
1254          "\xF0\x9D\x92\xB5"    => "\x7A",
1255          "\xF0\x9D\x93\x90"    => "\x61",
1256          "\xF0\x9D\x93\x91"    => "\x62",
1257          "\xF0\x9D\x93\x92"    => "\x63",
1258          "\xF0\x9D\x93\x93"    => "\x64",
1259          "\xF0\x9D\x93\x94"    => "\x65",
1260          "\xF0\x9D\x93\x95"    => "\x66",
1261          "\xF0\x9D\x93\x96"    => "\x67",
1262          "\xF0\x9D\x93\x97"    => "\x68",
1263          "\xF0\x9D\x93\x98"    => "\x69",
1264          "\xF0\x9D\x93\x99"    => "\x6A",
1265          "\xF0\x9D\x93\x9A"    => "\x6B",
1266          "\xF0\x9D\x93\x9B"    => "\x6C",
1267          "\xF0\x9D\x93\x9C"    => "\x6D",
1268          "\xF0\x9D\x93\x9D"    => "\x6E",
1269          "\xF0\x9D\x93\x9E"    => "\x6F",
1270          "\xF0\x9D\x93\x9F"    => "\x70",
1271          "\xF0\x9D\x93\xA0"    => "\x71",
1272          "\xF0\x9D\x93\xA1"    => "\x72",
1273          "\xF0\x9D\x93\xA2"    => "\x73",
1274          "\xF0\x9D\x93\xA3"    => "\x74",
1275          "\xF0\x9D\x93\xA4"    => "\x75",
1276          "\xF0\x9D\x93\xA5"    => "\x76",
1277          "\xF0\x9D\x93\xA6"    => "\x77",
1278          "\xF0\x9D\x93\xA7"    => "\x78",
1279          "\xF0\x9D\x93\xA8"    => "\x79",
1280          "\xF0\x9D\x93\xA9"    => "\x7A",
1281          "\xF0\x9D\x94\x84"    => "\x61",
1282          "\xF0\x9D\x94\x85"    => "\x62",
1283          "\xF0\x9D\x94\x87"    => "\x64",
1284          "\xF0\x9D\x94\x88"    => "\x65",
1285          "\xF0\x9D\x94\x89"    => "\x66",
1286          "\xF0\x9D\x94\x8A"    => "\x67",
1287          "\xF0\x9D\x94\x8D"    => "\x6A",
1288          "\xF0\x9D\x94\x8E"    => "\x6B",
1289          "\xF0\x9D\x94\x8F"    => "\x6C",
1290          "\xF0\x9D\x94\x90"    => "\x6D",
1291          "\xF0\x9D\x94\x91"    => "\x6E",
1292          "\xF0\x9D\x94\x92"    => "\x6F",
1293          "\xF0\x9D\x94\x93"    => "\x70",
1294          "\xF0\x9D\x94\x94"    => "\x71",
1295          "\xF0\x9D\x94\x96"    => "\x73",
1296          "\xF0\x9D\x94\x97"    => "\x74",
1297          "\xF0\x9D\x94\x98"    => "\x75",
1298          "\xF0\x9D\x94\x99"    => "\x76",
1299          "\xF0\x9D\x94\x9A"    => "\x77",
1300          "\xF0\x9D\x94\x9B"    => "\x78",
1301          "\xF0\x9D\x94\x9C"    => "\x79",
1302          "\xF0\x9D\x94\xB8"    => "\x61",
1303          "\xF0\x9D\x94\xB9"    => "\x62",
1304          "\xF0\x9D\x94\xBB"    => "\x64",
1305          "\xF0\x9D\x94\xBC"    => "\x65",
1306          "\xF0\x9D\x94\xBD"    => "\x66",
1307          "\xF0\x9D\x94\xBE"    => "\x67",
1308          "\xF0\x9D\x95\x80"    => "\x69",
1309          "\xF0\x9D\x95\x81"    => "\x6A",
1310          "\xF0\x9D\x95\x82"    => "\x6B",
1311          "\xF0\x9D\x95\x83"    => "\x6C",
1312          "\xF0\x9D\x95\x84"    => "\x6D",
1313          "\xF0\x9D\x95\x86"    => "\x6F",
1314          "\xF0\x9D\x95\x8A"    => "\x73",
1315          "\xF0\x9D\x95\x8B"    => "\x74",
1316          "\xF0\x9D\x95\x8C"    => "\x75",
1317          "\xF0\x9D\x95\x8D"    => "\x76",
1318          "\xF0\x9D\x95\x8E"    => "\x77",
1319          "\xF0\x9D\x95\x8F"    => "\x78",
1320          "\xF0\x9D\x95\x90"    => "\x79",
1321          "\xF0\x9D\x95\xAC"    => "\x61",
1322          "\xF0\x9D\x95\xAD"    => "\x62",
1323          "\xF0\x9D\x95\xAE"    => "\x63",
1324          "\xF0\x9D\x95\xAF"    => "\x64",
1325          "\xF0\x9D\x95\xB0"    => "\x65",
1326          "\xF0\x9D\x95\xB1"    => "\x66",
1327          "\xF0\x9D\x95\xB2"    => "\x67",
1328          "\xF0\x9D\x95\xB3"    => "\x68",
1329          "\xF0\x9D\x95\xB4"    => "\x69",
1330          "\xF0\x9D\x95\xB5"    => "\x6A",
1331          "\xF0\x9D\x95\xB6"    => "\x6B",
1332          "\xF0\x9D\x95\xB7"    => "\x6C",
1333          "\xF0\x9D\x95\xB8"    => "\x6D",
1334          "\xF0\x9D\x95\xB9"    => "\x6E",
1335          "\xF0\x9D\x95\xBA"    => "\x6F",
1336          "\xF0\x9D\x95\xBB"    => "\x70",
1337          "\xF0\x9D\x95\xBC"    => "\x71",
1338          "\xF0\x9D\x95\xBD"    => "\x72",
1339          "\xF0\x9D\x95\xBE"    => "\x73",
1340          "\xF0\x9D\x95\xBF"    => "\x74",
1341          "\xF0\x9D\x96\x80"    => "\x75",
1342          "\xF0\x9D\x96\x81"    => "\x76",
1343          "\xF0\x9D\x96\x82"    => "\x77",
1344          "\xF0\x9D\x96\x83"    => "\x78",
1345          "\xF0\x9D\x96\x84"    => "\x79",
1346          "\xF0\x9D\x96\x85"    => "\x7A",
1347          "\xF0\x9D\x96\xA0"    => "\x61",
1348          "\xF0\x9D\x96\xA1"    => "\x62",
1349          "\xF0\x9D\x96\xA2"    => "\x63",
1350          "\xF0\x9D\x96\xA3"    => "\x64",
1351          "\xF0\x9D\x96\xA4"    => "\x65",
1352          "\xF0\x9D\x96\xA5"    => "\x66",
1353          "\xF0\x9D\x96\xA6"    => "\x67",
1354          "\xF0\x9D\x96\xA7"    => "\x68",
1355          "\xF0\x9D\x96\xA8"    => "\x69",
1356          "\xF0\x9D\x96\xA9"    => "\x6A",
1357          "\xF0\x9D\x96\xAA"    => "\x6B",
1358          "\xF0\x9D\x96\xAB"    => "\x6C",
1359          "\xF0\x9D\x96\xAC"    => "\x6D",
1360          "\xF0\x9D\x96\xAD"    => "\x6E",
1361          "\xF0\x9D\x96\xAE"    => "\x6F",
1362          "\xF0\x9D\x96\xAF"    => "\x70",
1363          "\xF0\x9D\x96\xB0"    => "\x71",
1364          "\xF0\x9D\x96\xB1"    => "\x72",
1365          "\xF0\x9D\x96\xB2"    => "\x73",
1366          "\xF0\x9D\x96\xB3"    => "\x74",
1367          "\xF0\x9D\x96\xB4"    => "\x75",
1368          "\xF0\x9D\x96\xB5"    => "\x76",
1369          "\xF0\x9D\x96\xB6"    => "\x77",
1370          "\xF0\x9D\x96\xB7"    => "\x78",
1371          "\xF0\x9D\x96\xB8"    => "\x79",
1372          "\xF0\x9D\x96\xB9"    => "\x7A",
1373          "\xF0\x9D\x97\x94"    => "\x61",
1374          "\xF0\x9D\x97\x95"    => "\x62",
1375          "\xF0\x9D\x97\x96"    => "\x63",
1376          "\xF0\x9D\x97\x97"    => "\x64",
1377          "\xF0\x9D\x97\x98"    => "\x65",
1378          "\xF0\x9D\x97\x99"    => "\x66",
1379          "\xF0\x9D\x97\x9A"    => "\x67",
1380          "\xF0\x9D\x97\x9B"    => "\x68",
1381          "\xF0\x9D\x97\x9C"    => "\x69",
1382          "\xF0\x9D\x97\x9D"    => "\x6A",
1383          "\xF0\x9D\x97\x9E"    => "\x6B",
1384          "\xF0\x9D\x97\x9F"    => "\x6C",
1385          "\xF0\x9D\x97\xA0"    => "\x6D",
1386          "\xF0\x9D\x97\xA1"    => "\x6E",
1387          "\xF0\x9D\x97\xA2"    => "\x6F",
1388          "\xF0\x9D\x97\xA3"    => "\x70",
1389          "\xF0\x9D\x97\xA4"    => "\x71",
1390          "\xF0\x9D\x97\xA5"    => "\x72",
1391          "\xF0\x9D\x97\xA6"    => "\x73",
1392          "\xF0\x9D\x97\xA7"    => "\x74",
1393          "\xF0\x9D\x97\xA8"    => "\x75",
1394          "\xF0\x9D\x97\xA9"    => "\x76",
1395          "\xF0\x9D\x97\xAA"    => "\x77",
1396          "\xF0\x9D\x97\xAB"    => "\x78",
1397          "\xF0\x9D\x97\xAC"    => "\x79",
1398          "\xF0\x9D\x97\xAD"    => "\x7A",
1399          "\xF0\x9D\x98\x88"    => "\x61",
1400          "\xF0\x9D\x98\x89"    => "\x62",
1401          "\xF0\x9D\x98\x8A"    => "\x63",
1402          "\xF0\x9D\x98\x8B"    => "\x64",
1403          "\xF0\x9D\x98\x8C"    => "\x65",
1404          "\xF0\x9D\x98\x8D"    => "\x66",
1405          "\xF0\x9D\x98\x8E"    => "\x67",
1406          "\xF0\x9D\x98\x8F"    => "\x68",
1407          "\xF0\x9D\x98\x90"    => "\x69",
1408          "\xF0\x9D\x98\x91"    => "\x6A",
1409          "\xF0\x9D\x98\x92"    => "\x6B",
1410          "\xF0\x9D\x98\x93"    => "\x6C",
1411          "\xF0\x9D\x98\x94"    => "\x6D",
1412          "\xF0\x9D\x98\x95"    => "\x6E",
1413          "\xF0\x9D\x98\x96"    => "\x6F",
1414          "\xF0\x9D\x98\x97"    => "\x70",
1415          "\xF0\x9D\x98\x98"    => "\x71",
1416          "\xF0\x9D\x98\x99"    => "\x72",
1417          "\xF0\x9D\x98\x9A"    => "\x73",
1418          "\xF0\x9D\x98\x9B"    => "\x74",
1419          "\xF0\x9D\x98\x9C"    => "\x75",
1420          "\xF0\x9D\x98\x9D"    => "\x76",
1421          "\xF0\x9D\x98\x9E"    => "\x77",
1422          "\xF0\x9D\x98\x9F"    => "\x78",
1423          "\xF0\x9D\x98\xA0"    => "\x79",
1424          "\xF0\x9D\x98\xA1"    => "\x7A",
1425          "\xF0\x9D\x98\xBC"    => "\x61",
1426          "\xF0\x9D\x98\xBD"    => "\x62",
1427          "\xF0\x9D\x98\xBE"    => "\x63",
1428          "\xF0\x9D\x98\xBF"    => "\x64",
1429          "\xF0\x9D\x99\x80"    => "\x65",
1430          "\xF0\x9D\x99\x81"    => "\x66",
1431          "\xF0\x9D\x99\x82"    => "\x67",
1432          "\xF0\x9D\x99\x83"    => "\x68",
1433          "\xF0\x9D\x99\x84"    => "\x69",
1434          "\xF0\x9D\x99\x85"    => "\x6A",
1435          "\xF0\x9D\x99\x86"    => "\x6B",
1436          "\xF0\x9D\x99\x87"    => "\x6C",
1437          "\xF0\x9D\x99\x88"    => "\x6D",
1438          "\xF0\x9D\x99\x89"    => "\x6E",
1439          "\xF0\x9D\x99\x8A"    => "\x6F",
1440          "\xF0\x9D\x99\x8B"    => "\x70",
1441          "\xF0\x9D\x99\x8C"    => "\x71",
1442          "\xF0\x9D\x99\x8D"    => "\x72",
1443          "\xF0\x9D\x99\x8E"    => "\x73",
1444          "\xF0\x9D\x99\x8F"    => "\x74",
1445          "\xF0\x9D\x99\x90"    => "\x75",
1446          "\xF0\x9D\x99\x91"    => "\x76",
1447          "\xF0\x9D\x99\x92"    => "\x77",
1448          "\xF0\x9D\x99\x93"    => "\x78",
1449          "\xF0\x9D\x99\x94"    => "\x79",
1450          "\xF0\x9D\x99\x95"    => "\x7A",
1451          "\xF0\x9D\x99\xB0"    => "\x61",
1452          "\xF0\x9D\x99\xB1"    => "\x62",
1453          "\xF0\x9D\x99\xB2"    => "\x63",
1454          "\xF0\x9D\x99\xB3"    => "\x64",
1455          "\xF0\x9D\x99\xB4"    => "\x65",
1456          "\xF0\x9D\x99\xB5"    => "\x66",
1457          "\xF0\x9D\x99\xB6"    => "\x67",
1458          "\xF0\x9D\x99\xB7"    => "\x68",
1459          "\xF0\x9D\x99\xB8"    => "\x69",
1460          "\xF0\x9D\x99\xB9"    => "\x6A",
1461          "\xF0\x9D\x99\xBA"    => "\x6B",
1462          "\xF0\x9D\x99\xBB"    => "\x6C",
1463          "\xF0\x9D\x99\xBC"    => "\x6D",
1464          "\xF0\x9D\x99\xBD"    => "\x6E",
1465          "\xF0\x9D\x99\xBE"    => "\x6F",
1466          "\xF0\x9D\x99\xBF"    => "\x70",
1467          "\xF0\x9D\x9A\x80"    => "\x71",
1468          "\xF0\x9D\x9A\x81"    => "\x72",
1469          "\xF0\x9D\x9A\x82"    => "\x73",
1470          "\xF0\x9D\x9A\x83"    => "\x74",
1471          "\xF0\x9D\x9A\x84"    => "\x75",
1472          "\xF0\x9D\x9A\x85"    => "\x76",
1473          "\xF0\x9D\x9A\x86"    => "\x77",
1474          "\xF0\x9D\x9A\x87"    => "\x78",
1475          "\xF0\x9D\x9A\x88"    => "\x79",
1476          "\xF0\x9D\x9A\x89"    => "\x7A",
1477          "\xF0\x9D\x9A\xA8"    => "\xCE\xB1",
1478          "\xF0\x9D\x9A\xA9"    => "\xCE\xB2",
1479          "\xF0\x9D\x9A\xAA"    => "\xCE\xB3",
1480          "\xF0\x9D\x9A\xAB"    => "\xCE\xB4",
1481          "\xF0\x9D\x9A\xAC"    => "\xCE\xB5",
1482          "\xF0\x9D\x9A\xAD"    => "\xCE\xB6",
1483          "\xF0\x9D\x9A\xAE"    => "\xCE\xB7",
1484          "\xF0\x9D\x9A\xAF"    => "\xCE\xB8",
1485          "\xF0\x9D\x9A\xB0"    => "\xCE\xB9",
1486          "\xF0\x9D\x9A\xB1"    => "\xCE\xBA",
1487          "\xF0\x9D\x9A\xB2"    => "\xCE\xBB",
1488          "\xF0\x9D\x9A\xB3"    => "\xCE\xBC",
1489          "\xF0\x9D\x9A\xB4"    => "\xCE\xBD",
1490          "\xF0\x9D\x9A\xB5"    => "\xCE\xBE",
1491          "\xF0\x9D\x9A\xB6"    => "\xCE\xBF",
1492          "\xF0\x9D\x9A\xB7"    => "\xCF\x80",
1493          "\xF0\x9D\x9A\xB8"    => "\xCF\x81",
1494          "\xF0\x9D\x9A\xB9"    => "\xCE\xB8",
1495          "\xF0\x9D\x9A\xBA"    => "\xCF\x83",
1496          "\xF0\x9D\x9A\xBB"    => "\xCF\x84",
1497          "\xF0\x9D\x9A\xBC"    => "\xCF\x85",
1498          "\xF0\x9D\x9A\xBD"    => "\xCF\x86",
1499          "\xF0\x9D\x9A\xBE"    => "\xCF\x87",
1500          "\xF0\x9D\x9A\xBF"    => "\xCF\x88",
1501          "\xF0\x9D\x9B\x80"    => "\xCF\x89",
1502          "\xF0\x9D\x9B\x93"    => "\xCF\x83",
1503          "\xF0\x9D\x9B\xA2"    => "\xCE\xB1",
1504          "\xF0\x9D\x9B\xA3"    => "\xCE\xB2",
1505          "\xF0\x9D\x9B\xA4"    => "\xCE\xB3",
1506          "\xF0\x9D\x9B\xA5"    => "\xCE\xB4",
1507          "\xF0\x9D\x9B\xA6"    => "\xCE\xB5",
1508          "\xF0\x9D\x9B\xA7"    => "\xCE\xB6",
1509          "\xF0\x9D\x9B\xA8"    => "\xCE\xB7",
1510          "\xF0\x9D\x9B\xA9"    => "\xCE\xB8",
1511          "\xF0\x9D\x9B\xAA"    => "\xCE\xB9",
1512          "\xF0\x9D\x9B\xAB"    => "\xCE\xBA",
1513          "\xF0\x9D\x9B\xAC"    => "\xCE\xBB",
1514          "\xF0\x9D\x9B\xAD"    => "\xCE\xBC",
1515          "\xF0\x9D\x9B\xAE"    => "\xCE\xBD",
1516          "\xF0\x9D\x9B\xAF"    => "\xCE\xBE",
1517          "\xF0\x9D\x9B\xB0"    => "\xCE\xBF",
1518          "\xF0\x9D\x9B\xB1"    => "\xCF\x80",
1519          "\xF0\x9D\x9B\xB2"    => "\xCF\x81",
1520          "\xF0\x9D\x9B\xB3"    => "\xCE\xB8",
1521          "\xF0\x9D\x9B\xB4"    => "\xCF\x83",
1522          "\xF0\x9D\x9B\xB5"    => "\xCF\x84",
1523          "\xF0\x9D\x9B\xB6"    => "\xCF\x85",
1524          "\xF0\x9D\x9B\xB7"    => "\xCF\x86",
1525          "\xF0\x9D\x9B\xB8"    => "\xCF\x87",
1526          "\xF0\x9D\x9B\xB9"    => "\xCF\x88",
1527          "\xF0\x9D\x9B\xBA"    => "\xCF\x89",
1528          "\xF0\x9D\x9C\x8D"    => "\xCF\x83",
1529          "\xF0\x9D\x9C\x9C"    => "\xCE\xB1",
1530          "\xF0\x9D\x9C\x9D"    => "\xCE\xB2",
1531          "\xF0\x9D\x9C\x9E"    => "\xCE\xB3",
1532          "\xF0\x9D\x9C\x9F"    => "\xCE\xB4",
1533          "\xF0\x9D\x9C\xA0"    => "\xCE\xB5",
1534          "\xF0\x9D\x9C\xA1"    => "\xCE\xB6",
1535          "\xF0\x9D\x9C\xA2"    => "\xCE\xB7",
1536          "\xF0\x9D\x9C\xA3"    => "\xCE\xB8",
1537          "\xF0\x9D\x9C\xA4"    => "\xCE\xB9",
1538          "\xF0\x9D\x9C\xA5"    => "\xCE\xBA",
1539          "\xF0\x9D\x9C\xA6"    => "\xCE\xBB",
1540          "\xF0\x9D\x9C\xA7"    => "\xCE\xBC",
1541          "\xF0\x9D\x9C\xA8"    => "\xCE\xBD",
1542          "\xF0\x9D\x9C\xA9"    => "\xCE\xBE",
1543          "\xF0\x9D\x9C\xAA"    => "\xCE\xBF",
1544          "\xF0\x9D\x9C\xAB"    => "\xCF\x80",
1545          "\xF0\x9D\x9C\xAC"    => "\xCF\x81",
1546          "\xF0\x9D\x9C\xAD"    => "\xCE\xB8",
1547          "\xF0\x9D\x9C\xAE"    => "\xCF\x83",
1548          "\xF0\x9D\x9C\xAF"    => "\xCF\x84",
1549          "\xF0\x9D\x9C\xB0"    => "\xCF\x85",
1550          "\xF0\x9D\x9C\xB1"    => "\xCF\x86",
1551          "\xF0\x9D\x9C\xB2"    => "\xCF\x87",
1552          "\xF0\x9D\x9C\xB3"    => "\xCF\x88",
1553          "\xF0\x9D\x9C\xB4"    => "\xCF\x89",
1554          "\xF0\x9D\x9D\x87"    => "\xCF\x83",
1555          "\xF0\x9D\x9D\x96"    => "\xCE\xB1",
1556          "\xF0\x9D\x9D\x97"    => "\xCE\xB2",
1557          "\xF0\x9D\x9D\x98"    => "\xCE\xB3",
1558          "\xF0\x9D\x9D\x99"    => "\xCE\xB4",
1559          "\xF0\x9D\x9D\x9A"    => "\xCE\xB5",
1560          "\xF0\x9D\x9D\x9B"    => "\xCE\xB6",
1561          "\xF0\x9D\x9D\x9C"    => "\xCE\xB7",
1562          "\xF0\x9D\x9D\x9D"    => "\xCE\xB8",
1563          "\xF0\x9D\x9D\x9E"    => "\xCE\xB9",
1564          "\xF0\x9D\x9D\x9F"    => "\xCE\xBA",
1565          "\xF0\x9D\x9D\xA0"    => "\xCE\xBB",
1566          "\xF0\x9D\x9D\xA1"    => "\xCE\xBC",
1567          "\xF0\x9D\x9D\xA2"    => "\xCE\xBD",
1568          "\xF0\x9D\x9D\xA3"    => "\xCE\xBE",
1569          "\xF0\x9D\x9D\xA4"    => "\xCE\xBF",
1570          "\xF0\x9D\x9D\xA5"    => "\xCF\x80",
1571          "\xF0\x9D\x9D\xA6"    => "\xCF\x81",
1572          "\xF0\x9D\x9D\xA7"    => "\xCE\xB8",
1573          "\xF0\x9D\x9D\xA8"    => "\xCF\x83",
1574          "\xF0\x9D\x9D\xA9"    => "\xCF\x84",
1575          "\xF0\x9D\x9D\xAA"    => "\xCF\x85",
1576          "\xF0\x9D\x9D\xAB"    => "\xCF\x86",
1577          "\xF0\x9D\x9D\xAC"    => "\xCF\x87",
1578          "\xF0\x9D\x9D\xAD"    => "\xCF\x88",
1579          "\xF0\x9D\x9D\xAE"    => "\xCF\x89",
1580          "\xF0\x9D\x9E\x81"    => "\xCF\x83",
1581          "\xF0\x9D\x9E\x90"    => "\xCE\xB1",
1582          "\xF0\x9D\x9E\x91"    => "\xCE\xB2",
1583          "\xF0\x9D\x9E\x92"    => "\xCE\xB3",
1584          "\xF0\x9D\x9E\x93"    => "\xCE\xB4",
1585          "\xF0\x9D\x9E\x94"    => "\xCE\xB5",
1586          "\xF0\x9D\x9E\x95"    => "\xCE\xB6",
1587          "\xF0\x9D\x9E\x96"    => "\xCE\xB7",
1588          "\xF0\x9D\x9E\x97"    => "\xCE\xB8",
1589          "\xF0\x9D\x9E\x98"    => "\xCE\xB9",
1590          "\xF0\x9D\x9E\x99"    => "\xCE\xBA",
1591          "\xF0\x9D\x9E\x9A"    => "\xCE\xBB",
1592          "\xF0\x9D\x9E\x9B"    => "\xCE\xBC",
1593          "\xF0\x9D\x9E\x9C"    => "\xCE\xBD",
1594          "\xF0\x9D\x9E\x9D"    => "\xCE\xBE",
1595          "\xF0\x9D\x9E\x9E"    => "\xCE\xBF",
1596          "\xF0\x9D\x9E\x9F"    => "\xCF\x80",
1597          "\xF0\x9D\x9E\xA0"    => "\xCF\x81",
1598          "\xF0\x9D\x9E\xA1"    => "\xCE\xB8",
1599          "\xF0\x9D\x9E\xA2"    => "\xCF\x83",
1600          "\xF0\x9D\x9E\xA3"    => "\xCF\x84",
1601          "\xF0\x9D\x9E\xA4"    => "\xCF\x85",
1602          "\xF0\x9D\x9E\xA5"    => "\xCF\x86",
1603          "\xF0\x9D\x9E\xA6"    => "\xCF\x87",
1604          "\xF0\x9D\x9E\xA7"    => "\xCF\x88",
1605          "\xF0\x9D\x9E\xA8"    => "\xCF\x89",
1606          "\xF0\x9D\x9E\xBB"    => "\xCF\x83",
1607          "\xF0\x9D\x9F\x8A"    => "\xCF\x9D",
1608      );
1609      global $phpbb_root_path, $phpEx;
1610  
1611      // do the case fold
1612      $text = utf8_case_fold($text, $option);
1613  
1614      if (!class_exists('utf_normalizer'))
1615      {
1616          global $phpbb_root_path, $phpEx;
1617          include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1618      }
1619  
1620      // convert to NFKC
1621      utf_normalizer::nfkc($text);
1622  
1623      // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
1624      $text = strtr($text, $fc_nfkc_closure);
1625  
1626      return $text;
1627  }
1628  
1629  /**
1630  * Assume the input is NFC:
1631  * Takes the input and does a "special" case fold. It does minor normalization as well.
1632  *
1633  * @param    string    $text    text to be case folded
1634  * @param    string    $option    determines how we will fold the cases
1635  * @return    string            case folded text
1636  */
1637  function utf8_case_fold_nfc($text, $option = 'full')
1638  {
1639      static $uniarray = array();
1640      static $ypogegrammeni = array(
1641          "\xCD\xBA"        => "\x20\xCD\x85",
1642          "\xE1\xBE\x80"    => "\xE1\xBC\x80\xCD\x85",
1643          "\xE1\xBE\x81"    => "\xE1\xBC\x81\xCD\x85",
1644          "\xE1\xBE\x82"    => "\xE1\xBC\x82\xCD\x85",
1645          "\xE1\xBE\x83"    => "\xE1\xBC\x83\xCD\x85",
1646          "\xE1\xBE\x84"    => "\xE1\xBC\x84\xCD\x85",
1647          "\xE1\xBE\x85"    => "\xE1\xBC\x85\xCD\x85",
1648          "\xE1\xBE\x86"    => "\xE1\xBC\x86\xCD\x85",
1649          "\xE1\xBE\x87"    => "\xE1\xBC\x87\xCD\x85",
1650          "\xE1\xBE\x88"    => "\xE1\xBC\x88\xCD\x85",
1651          "\xE1\xBE\x89"    => "\xE1\xBC\x89\xCD\x85",
1652          "\xE1\xBE\x8A"    => "\xE1\xBC\x8A\xCD\x85",
1653          "\xE1\xBE\x8B"    => "\xE1\xBC\x8B\xCD\x85",
1654          "\xE1\xBE\x8C"    => "\xE1\xBC\x8C\xCD\x85",
1655          "\xE1\xBE\x8D"    => "\xE1\xBC\x8D\xCD\x85",
1656          "\xE1\xBE\x8E"    => "\xE1\xBC\x8E\xCD\x85",
1657          "\xE1\xBE\x8F"    => "\xE1\xBC\x8F\xCD\x85",
1658          "\xE1\xBE\x90"    => "\xE1\xBC\xA0\xCD\x85",
1659          "\xE1\xBE\x91"    => "\xE1\xBC\xA1\xCD\x85",
1660          "\xE1\xBE\x92"    => "\xE1\xBC\xA2\xCD\x85",
1661          "\xE1\xBE\x93"    => "\xE1\xBC\xA3\xCD\x85",
1662          "\xE1\xBE\x94"    => "\xE1\xBC\xA4\xCD\x85",
1663          "\xE1\xBE\x95"    => "\xE1\xBC\xA5\xCD\x85",
1664          "\xE1\xBE\x96"    => "\xE1\xBC\xA6\xCD\x85",
1665          "\xE1\xBE\x97"    => "\xE1\xBC\xA7\xCD\x85",
1666          "\xE1\xBE\x98"    => "\xE1\xBC\xA8\xCD\x85",
1667          "\xE1\xBE\x99"    => "\xE1\xBC\xA9\xCD\x85",
1668          "\xE1\xBE\x9A"    => "\xE1\xBC\xAA\xCD\x85",
1669          "\xE1\xBE\x9B"    => "\xE1\xBC\xAB\xCD\x85",
1670          "\xE1\xBE\x9C"    => "\xE1\xBC\xAC\xCD\x85",
1671          "\xE1\xBE\x9D"    => "\xE1\xBC\xAD\xCD\x85",
1672          "\xE1\xBE\x9E"    => "\xE1\xBC\xAE\xCD\x85",
1673          "\xE1\xBE\x9F"    => "\xE1\xBC\xAF\xCD\x85",
1674          "\xE1\xBE\xA0"    => "\xE1\xBD\xA0\xCD\x85",
1675          "\xE1\xBE\xA1"    => "\xE1\xBD\xA1\xCD\x85",
1676          "\xE1\xBE\xA2"    => "\xE1\xBD\xA2\xCD\x85",
1677          "\xE1\xBE\xA3"    => "\xE1\xBD\xA3\xCD\x85",
1678          "\xE1\xBE\xA4"    => "\xE1\xBD\xA4\xCD\x85",
1679          "\xE1\xBE\xA5"    => "\xE1\xBD\xA5\xCD\x85",
1680          "\xE1\xBE\xA6"    => "\xE1\xBD\xA6\xCD\x85",
1681          "\xE1\xBE\xA7"    => "\xE1\xBD\xA7\xCD\x85",
1682          "\xE1\xBE\xA8"    => "\xE1\xBD\xA8\xCD\x85",
1683          "\xE1\xBE\xA9"    => "\xE1\xBD\xA9\xCD\x85",
1684          "\xE1\xBE\xAA"    => "\xE1\xBD\xAA\xCD\x85",
1685          "\xE1\xBE\xAB"    => "\xE1\xBD\xAB\xCD\x85",
1686          "\xE1\xBE\xAC"    => "\xE1\xBD\xAC\xCD\x85",
1687          "\xE1\xBE\xAD"    => "\xE1\xBD\xAD\xCD\x85",
1688          "\xE1\xBE\xAE"    => "\xE1\xBD\xAE\xCD\x85",
1689          "\xE1\xBE\xAF"    => "\xE1\xBD\xAF\xCD\x85",
1690          "\xE1\xBE\xB2"    => "\xE1\xBD\xB0\xCD\x85",
1691          "\xE1\xBE\xB3"    => "\xCE\xB1\xCD\x85",
1692          "\xE1\xBE\xB4"    => "\xCE\xAC\xCD\x85",
1693          "\xE1\xBE\xB7"    => "\xE1\xBE\xB6\xCD\x85",
1694          "\xE1\xBE\xBC"    => "\xCE\x91\xCD\x85",
1695          "\xE1\xBF\x82"    => "\xE1\xBD\xB4\xCD\x85",
1696          "\xE1\xBF\x83"    => "\xCE\xB7\xCD\x85",
1697          "\xE1\xBF\x84"    => "\xCE\xAE\xCD\x85",
1698          "\xE1\xBF\x87"    => "\xE1\xBF\x86\xCD\x85",
1699          "\xE1\xBF\x8C"    => "\xCE\x97\xCD\x85",
1700          "\xE1\xBF\xB2"    => "\xE1\xBD\xBC\xCD\x85",
1701          "\xE1\xBF\xB3"    => "\xCF\x89\xCD\x85",
1702          "\xE1\xBF\xB4"    => "\xCF\x8E\xCD\x85",
1703          "\xE1\xBF\xB7"    => "\xE1\xBF\xB6\xCD\x85",
1704          "\xE1\xBF\xBC"    => "\xCE\xA9\xCD\x85",
1705      );
1706      global $phpbb_root_path, $phpEx;
1707  
1708      // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
1709      $text = strtr($text, $ypogegrammeni);
1710  
1711      // do the case fold
1712      $text = utf8_case_fold($text, $option);
1713  
1714      return $text;
1715  }
1716  
1717  if (extension_loaded('intl'))
1718  {
1719      /**
1720      * wrapper around PHP's native normalizer from intl
1721      * previously a PECL extension, included in the core since PHP 5.3.0
1722      * http://php.net/manual/en/normalizer.normalize.php
1723      *
1724      * @param    mixed    $strings    a string or an array of strings to normalize
1725      * @return    mixed                the normalized content, preserving array keys if array given.
1726      */
1727  	function utf8_normalize_nfc($strings)
1728      {
1729          if (empty($strings))
1730          {
1731              return $strings;
1732          }
1733  
1734          if (!is_array($strings))
1735          {
1736              if (Normalizer::isNormalized($strings))
1737              {
1738                  return $strings;
1739              }
1740              return (string) Normalizer::normalize($strings);
1741          }
1742          else
1743          {
1744              foreach ($strings as $key => $string)
1745              {
1746                  if (is_array($string))
1747                  {
1748                      foreach ($string as $_key => $_string)
1749                      {
1750                          if (Normalizer::isNormalized($strings[$key][$_key]))
1751                          {
1752                              continue;
1753                          }
1754                          $strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
1755                      }
1756                  }
1757                  else
1758                  {
1759                      if (Normalizer::isNormalized($strings[$key]))
1760                      {
1761                          continue;
1762                      }
1763                      $strings[$key] = (string) Normalizer::normalize($strings[$key]);
1764                  }
1765              }
1766          }
1767  
1768          return $strings;
1769      }
1770  }
1771  else
1772  {
1773      /**
1774      * A wrapper function for the normalizer which takes care of including the class if
1775      * required and modifies the passed strings to be in NFC (Normalization Form Composition).
1776      *
1777      * @param    mixed    $strings    a string or an array of strings to normalize
1778      * @return    mixed                the normalized content, preserving array keys if array given.
1779      */
1780  	function utf8_normalize_nfc($strings)
1781      {
1782          if (empty($strings))
1783          {
1784              return $strings;
1785          }
1786  
1787          if (!class_exists('utf_normalizer'))
1788          {
1789              global $phpbb_root_path, $phpEx;
1790              include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1791          }
1792  
1793          if (!is_array($strings))
1794          {
1795              utf_normalizer::nfc($strings);
1796          }
1797          else if (is_array($strings))
1798          {
1799              foreach ($strings as $key => $string)
1800              {
1801                  if (is_array($string))
1802                  {
1803                      foreach ($string as $_key => $_string)
1804                      {
1805                          utf_normalizer::nfc($strings[$key][$_key]);
1806                      }
1807                  }
1808                  else
1809                  {
1810                      utf_normalizer::nfc($strings[$key]);
1811                  }
1812              }
1813          }
1814  
1815          return $strings;
1816      }
1817  }
1818  
1819  /**
1820  * This function is used to generate a "clean" version of a string.
1821  * Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
1822  * Additionally a homographs of one character are transformed into one specific character (preferably ASCII
1823  * if it is an ASCII character).
1824  *
1825  * Please be aware that if you change something within this function or within
1826  * functions used here you need to rebuild/update the username_clean column in the users table. And all other
1827  * columns that store a clean string otherwise you will break this functionality.
1828  *
1829  * @param    string    $text    An unclean string, mabye user input (has to be valid UTF-8!)
1830  * @return    string            Cleaned up version of the input string
1831  */
1832  function utf8_clean_string($text)
1833  {
1834      global $phpbb_root_path, $phpEx;
1835  
1836      static $homographs = array();
1837      if (empty($homographs))
1838      {
1839          $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
1840      }
1841  
1842      $text = utf8_case_fold_nfkc($text);
1843      $text = strtr($text, $homographs);
1844      // Other control characters
1845      $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
1846  
1847      // we need to reduce multiple spaces to a single one
1848      $text = preg_replace('# {2,}#', ' ', $text);
1849  
1850      // we can use trim here as all the other space characters should have been turned
1851      // into normal ASCII spaces by now
1852      return trim($text);
1853  }
1854  
1855  /**
1856  * A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
1857  */
1858  function utf8_htmlspecialchars($value)
1859  {
1860      return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
1861  }
1862  
1863  /**
1864  * Trying to convert returned system message to utf8
1865  *
1866  * PHP assumes such messages are ISO-8859-1 so we'll do that too
1867  * and if it breaks messages we'll blame it on them ;-)
1868  */
1869  function utf8_convert_message($message)
1870  {
1871      // First of all check if conversion is neded at all, as there is no point
1872      // in converting ASCII messages from ISO-8859-1 to UTF-8
1873      if (!preg_match('/[\x80-\xFF]/', $message))
1874      {
1875          return utf8_htmlspecialchars($message);
1876      }
1877  
1878      // else we need to convert some part of the message
1879      return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
1880  }
1881  
1882  /**
1883  * UTF8-compatible wordwrap replacement
1884  *
1885  * @param    string    $string    The input string
1886  * @param    int        $width    The column width. Defaults to 75.
1887  * @param    string    $break    The line is broken using the optional break parameter. Defaults to '\n'.
1888  * @param    bool    $cut    If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
1889  *
1890  * @return    string            the given string wrapped at the specified column.
1891  *
1892  */
1893  function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
1894  {
1895      // We first need to explode on $break, not destroying existing (intended) breaks
1896      $lines = explode($break, $string);
1897      $new_lines = array(0 => '');
1898      $index = 0;
1899  
1900      foreach ($lines as $line)
1901      {
1902          $words = explode(' ', $line);
1903  
1904          for ($i = 0, $size = sizeof($words); $i < $size; $i++)
1905          {
1906              $word = $words[$i];
1907  
1908              // If cut is true we need to cut the word if it is > width chars
1909              if ($cut && utf8_strlen($word) > $width)
1910              {
1911                  $words[$i] = utf8_substr($word, $width);
1912                  $word = utf8_substr($word, 0, $width);
1913                  $i--;
1914              }
1915  
1916              if (utf8_strlen($new_lines[$index] . $word) > $width)
1917              {
1918                  $new_lines[$index] = substr($new_lines[$index], 0, -1);
1919                  $index++;
1920                  $new_lines[$index] = '';
1921              }
1922  
1923              $new_lines[$index] .= $word . ' ';
1924          }
1925  
1926          $new_lines[$index] = substr($new_lines[$index], 0, -1);
1927          $index++;
1928          $new_lines[$index] = '';
1929      }
1930  
1931      unset($new_lines[$index]);
1932      return implode($break, $new_lines);
1933  }
1934  
1935  /**
1936  * UTF8-safe basename() function
1937  *
1938  * basename() has some limitations and is dependent on the locale setting
1939  * according to the PHP manual. Therefore we provide our own locale independent
1940  * basename function.
1941  *
1942  * @param string $filename The filename basename() should be applied to
1943  * @return string The basenamed filename
1944  */
1945  function utf8_basename($filename)
1946  {
1947      // We always check for forward slash AND backward slash
1948      // because they could be mixed or "sneaked" in. ;)
1949      // You know, never trust user input...
1950      if (strpos($filename, '/') !== false)
1951      {
1952          $filename = utf8_substr($filename, utf8_strrpos($filename, '/') + 1);
1953      }
1954  
1955      if (strpos($filename, '\\') !== false)
1956      {
1957          $filename = utf8_substr($filename, utf8_strrpos($filename, '\\') + 1);
1958      }
1959  
1960      return $filename;
1961  }
1962  
1963  /**
1964  * UTF8-safe str_replace() function
1965  *
1966  * @param string $search The value to search for
1967  * @param string $replace The replacement string
1968  * @param string $subject The target string
1969  * @return string The resultant string
1970  */
1971  function utf8_str_replace($search, $replace, $subject)
1972  {
1973      if (!is_array($search))
1974      {
1975          $search = array($search);
1976          if (is_array($replace))
1977          {
1978              $replace = (string) $replace;
1979              trigger_error('Array to string conversion', E_USER_NOTICE);
1980          }
1981      }
1982  
1983      $length = sizeof($search);
1984  
1985      if (!is_array($replace))
1986      {
1987          $replace = array_fill(0, $length, $replace);
1988      }
1989      else
1990      {
1991          $replace = array_pad($replace, $length, '');
1992      }
1993  
1994      for ($i = 0; $i < $length; $i++)
1995      {
1996          $search_length = utf8_strlen($search[$i]);
1997          $replace_length = utf8_strlen($replace[$i]);
1998  
1999          $offset = 0;
2000          while (($start = utf8_strpos($subject, $search[$i], $offset)) !== false)
2001          {
2002              $subject = utf8_substr($subject, 0, $start) . $replace[$i] . utf8_substr($subject, $start + $search_length);
2003              $offset = $start + $replace_length;
2004          }
2005      }
2006  
2007      return $subject;
2008  }


Generated: Thu Jan 11 00:25:41 2018 Cross-referenced by PHPXref 0.7.1