parseShortnames($text); $this->parseCustomAliases($text); $this->parseUnicode($text); } /** * Add an emoji tag for given sequence * * @param integer $tagPos Position of the tag in the original text * @param integer $tagLen Length of text consumed by the tag * @param string $hex Full-qualified sequence of codepoints in hex * @return void */ protected function addTag($tagPos, $tagLen, $hex) { $tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, 10); // Short sequence, only the relevant codepoints are kept $seq = str_replace(['-200d', '-fe0f'], '', $hex); $tag->setAttribute('seq', $seq); // Twemoji sequence, leading zeroes are removed and VS-16 are removed from non-ZWJ sequences $tseq = ltrim($hex, '0'); if (strpos($tseq, '-200d') === false) { $tseq = str_replace('-fe0f', '', $tseq); } $tag->setAttribute('tseq', $tseq); } /** * Get the sequence of Unicode codepoints that corresponds to given emoji * * @param string $str UTF-8 emoji * @return string Codepoint sequence, e.g. "0023-20e3" */ protected function getHexSequence($str) { $seq = []; $i = -1; while (++$i < strlen($str)) { $cp = ord($str[$i]); if ($cp >= 0xF0) { $cp = ($cp << 18) + (ord($str[++$i]) << 12) + (ord($str[++$i]) << 6) + ord($str[++$i]) - 0x3C82080; } elseif ($cp >= 0xE0) { $cp = ($cp << 12) + (ord($str[++$i]) << 6) + ord($str[++$i]) - 0xE2080; } elseif ($cp >= 0xC0) { $cp = ($cp << 6) + ord($str[++$i]) - 0x3080; } $seq[] = sprintf('%04x', $cp); } return implode('-', $seq); } /** * Parse custom aliases in given text * * @param string $text Original text * @return void */ protected function parseCustomAliases($text) { if (empty($this->config['customRegexp'])) { return; } $matchPos = 0; if (isset($this->config['customQuickMatch'])) { $matchPos = strpos($text, $this->config['customQuickMatch']); if ($matchPos === false) { return; } } preg_match_all($this->config['customRegexp'], $text, $matches, PREG_OFFSET_CAPTURE, $matchPos); foreach ($matches[0] as list($alias, $tagPos)) { if (isset($this->parser->registeredVars['Emoji.aliases'][$alias])) { $hex = $this->getHexSequence($this->parser->registeredVars['Emoji.aliases'][$alias]); $this->addTag($tagPos, strlen($alias), $hex); } } } /** * Parse shortnames in given text * * @param string $text Original text * @return void */ protected function parseShortnames($text) { $matchPos = strpos($text, ':'); if ($matchPos === false) { return; } preg_match_all($this->shortnameRegexp, $text, $matches, PREG_OFFSET_CAPTURE, $matchPos); foreach ($matches[0] as list($alias, $tagPos)) { $alias .= ':'; $tagLen = strlen($alias); if (isset($this->parser->registeredVars['Emoji.aliases'][$alias])) { $hex = $this->getHexSequence($this->parser->registeredVars['Emoji.aliases'][$alias]); $this->addTag($tagPos, $tagLen, $hex); } elseif (preg_match('/^:[0-3][0-9a-f]{3,4}(?:-[0-9a-f]{4,5})*:$/', $alias)) { $this->addTag($tagPos, $tagLen, substr($alias, 1, -1)); } } } /** * Parse Unicode emoji in given text * * @param string $text Original text * @return void */ protected function parseUnicode($text) { if (strpos($text, "\xE2") === false && strpos($text, "\xEF") === false && strpos($text, "\xF0") === false) { return; } preg_match_all($this->unicodeRegexp, $text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as list($emoji, $tagPos)) { $this->addTag($tagPos, strlen($emoji), $this->getHexSequence($emoji)); } } }