[ Index ] |
PHP Cross Reference of phpBB-3.2.11-deutsch |
[Summary view] [Print] [Text view]
1 <?php 2 3 /* 4 * Copyright (C) 2016 Nicolas Grekas - p@tchwork.com 5 * 6 * This library is free software; you can redistribute it and/or modify it 7 * under the terms of the (at your option): 8 * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or 9 * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt). 10 */ 11 12 namespace Patchwork\PHP\Shim; 13 14 /** 15 * Normalizer is a PHP fallback implementation of the Normalizer class provided by the intl extension. 16 * 17 * It has been validated with Unicode 6.3 Normalization Conformance Test. 18 * See http://www.unicode.org/reports/tr15/ for detailed info about Unicode normalizations. 19 * 20 * @internal 21 */ 22 class Normalizer 23 { 24 const NONE = 1; 25 const FORM_D = 2; 26 const FORM_KD = 3; 27 const FORM_C = 4; 28 const FORM_KC = 5; 29 const NFD = 2; 30 const NFKD = 3; 31 const NFC = 4; 32 const NFKC = 5; 33 34 private static $C; 35 private static $D; 36 private static $KD; 37 private static $cC; 38 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4); 39 private static $ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; 40 41 public static function isNormalized($s, $form = self::NFC) 42 { 43 if ($form <= self::NONE || self::NFKC < $form) { 44 return false; 45 } 46 if (!isset($s[strspn($s .= '', self::$ASCII)])) { 47 return true; 48 } 49 if (self::NFC === $form && preg_match('//u', $s) && !preg_match('/[^\x00-\x{2FF}]/u', $s)) { 50 return true; 51 } 52 53 return false; // Pretend false as quick checks implementented in PHP won't be so quick 54 } 55 56 public static function normalize($s, $form = self::NFC) 57 { 58 if (!preg_match('//u', $s .= '')) { 59 return false; 60 } 61 62 switch ($form) { 63 case self::NONE: return $s; 64 case self::NFC: $C = true; $K = false; break; 65 case self::NFD: $C = false; $K = false; break; 66 case self::NFKC: $C = true; $K = true; break; 67 case self::NFKD: $C = false; $K = true; break; 68 default: return false; 69 } 70 71 if ('' === $s) { 72 return ''; 73 } 74 75 if ($K && null === self::$KD) { 76 self::$KD = self::getData('compatibilityDecomposition'); 77 } 78 79 if (null === self::$D) { 80 self::$D = self::getData('canonicalDecomposition'); 81 self::$cC = self::getData('combiningClass'); 82 } 83 84 if (null !== $mbEncoding = (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) ? mb_internal_encoding() : null) { 85 mb_internal_encoding('8bit'); 86 } 87 88 $r = self::decompose($s, $K); 89 90 if ($C) { 91 if (null === self::$C) { 92 self::$C = self::getData('canonicalComposition'); 93 } 94 95 $r = self::recompose($r); 96 } 97 if (null !== $mbEncoding) { 98 mb_internal_encoding($mbEncoding); 99 } 100 101 return $r; 102 } 103 104 private static function recompose($s) 105 { 106 $ASCII = self::$ASCII; 107 $compMap = self::$C; 108 $combClass = self::$cC; 109 $ulenMask = self::$ulenMask; 110 111 $result = $tail = ''; 112 113 $i = $s[0] < "\x80" ? 1 : $ulenMask[$s[0] & "\xF0"]; 114 $len = strlen($s); 115 116 $lastUchr = substr($s, 0, $i); 117 $lastUcls = isset($combClass[$lastUchr]) ? 256 : 0; 118 119 while ($i < $len) { 120 if ($s[$i] < "\x80") { 121 // ASCII chars 122 123 if ($tail) { 124 $lastUchr .= $tail; 125 $tail = ''; 126 } 127 128 if ($j = strspn($s, $ASCII, $i + 1)) { 129 $lastUchr .= substr($s, $i, $j); 130 $i += $j; 131 } 132 133 $result .= $lastUchr; 134 $lastUchr = $s[$i]; 135 $lastUcls = 0; 136 ++$i; 137 continue; 138 } 139 140 $ulen = $ulenMask[$s[$i] & "\xF0"]; 141 $uchr = substr($s, $i, $ulen); 142 143 if ($lastUchr < "\xE1\x84\x80" || "\xE1\x84\x92" < $lastUchr 144 || $uchr < "\xE1\x85\xA1" || "\xE1\x85\xB5" < $uchr 145 || $lastUcls) { 146 // Table lookup and combining chars composition 147 148 $ucls = isset($combClass[$uchr]) ? $combClass[$uchr] : 0; 149 150 if (isset($compMap[$lastUchr.$uchr]) && (!$lastUcls || $lastUcls < $ucls)) { 151 $lastUchr = $compMap[$lastUchr.$uchr]; 152 } elseif ($lastUcls = $ucls) { 153 $tail .= $uchr; 154 } else { 155 if ($tail) { 156 $lastUchr .= $tail; 157 $tail = ''; 158 } 159 160 $result .= $lastUchr; 161 $lastUchr = $uchr; 162 } 163 } else { 164 // Hangul chars 165 166 $L = ord($lastUchr[2]) - 0x80; 167 $V = ord($uchr[2]) - 0xA1; 168 $T = 0; 169 170 $uchr = substr($s, $i + $ulen, 3); 171 172 if ("\xE1\x86\xA7" <= $uchr && $uchr <= "\xE1\x87\x82") { 173 $T = ord($uchr[2]) - 0xA7; 174 0 > $T && $T += 0x40; 175 $ulen += 3; 176 } 177 178 $L = 0xAC00 + ($L * 21 + $V) * 28 + $T; 179 $lastUchr = chr(0xE0 | $L >> 12).chr(0x80 | $L >> 6 & 0x3F).chr(0x80 | $L & 0x3F); 180 } 181 182 $i += $ulen; 183 } 184 185 return $result.$lastUchr.$tail; 186 } 187 188 private static function decompose($s, $c) 189 { 190 $result = ''; 191 192 $ASCII = self::$ASCII; 193 $decompMap = self::$D; 194 $combClass = self::$cC; 195 $ulenMask = self::$ulenMask; 196 if ($c) { 197 $compatMap = self::$KD; 198 } 199 200 $c = array(); 201 $i = 0; 202 $len = strlen($s); 203 204 while ($i < $len) { 205 if ($s[$i] < "\x80") { 206 // ASCII chars 207 208 if ($c) { 209 ksort($c); 210 $result .= implode('', $c); 211 $c = array(); 212 } 213 214 $j = 1 + strspn($s, $ASCII, $i + 1); 215 $result .= substr($s, $i, $j); 216 $i += $j; 217 continue; 218 } 219 220 $ulen = $ulenMask[$s[$i] & "\xF0"]; 221 $uchr = substr($s, $i, $ulen); 222 $i += $ulen; 223 224 if ($uchr < "\xEA\xB0\x80" || "\xED\x9E\xA3" < $uchr) { 225 // Table lookup 226 227 if ($uchr !== $j = isset($compatMap[$uchr]) ? $compatMap[$uchr] : (isset($decompMap[$uchr]) ? $decompMap[$uchr] : $uchr)) { 228 $uchr = $j; 229 230 $j = strlen($uchr); 231 $ulen = $uchr[0] < "\x80" ? 1 : $ulenMask[$uchr[0] & "\xF0"]; 232 233 if ($ulen != $j) { 234 // Put trailing chars in $s 235 236 $j -= $ulen; 237 $i -= $j; 238 239 if (0 > $i) { 240 $s = str_repeat(' ', -$i).$s; 241 $len -= $i; 242 $i = 0; 243 } 244 245 while ($j--) { 246 $s[$i + $j] = $uchr[$ulen + $j]; 247 } 248 249 $uchr = substr($uchr, 0, $ulen); 250 } 251 } 252 if (isset($combClass[$uchr])) { 253 // Combining chars, for sorting 254 255 if (!isset($c[$combClass[$uchr]])) { 256 $c[$combClass[$uchr]] = ''; 257 } 258 $c[$combClass[$uchr]] .= $uchr; 259 continue; 260 } 261 } else { 262 // Hangul chars 263 264 $uchr = unpack('C*', $uchr); 265 $j = (($uchr[1] - 224) << 12) + (($uchr[2] - 128) << 6) + $uchr[3] - 0xAC80; 266 267 $uchr = "\xE1\x84".chr(0x80 + (int) ($j / 588)) 268 ."\xE1\x85".chr(0xA1 + (int) (($j % 588) / 28)); 269 270 if ($j %= 28) { 271 $uchr .= $j < 25 272 ? ("\xE1\x86".chr(0xA7 + $j)) 273 : ("\xE1\x87".chr(0x67 + $j)); 274 } 275 } 276 if ($c) { 277 ksort($c); 278 $result .= implode('', $c); 279 $c = array(); 280 } 281 282 $result .= $uchr; 283 } 284 285 if ($c) { 286 ksort($c); 287 $result .= implode('', $c); 288 } 289 290 return $result; 291 } 292 293 private static function getData($file) 294 { 295 if (file_exists($file = __DIR__.'/unidata/'.$file.'.ser')) { 296 return unserialize(file_get_contents($file)); 297 } 298 299 return false; 300 } 301 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Nov 11 20:33:01 2020 | Cross-referenced by PHPXref 0.7.1 |