text = $text; $this->hasSingleQuote = (strpos($text, "'") !== false); $this->hasDoubleQuote = (strpos($text, '"') !== false); if (empty($this->config['disableQuotes'])) { $this->parseSingleQuotes(); $this->parseSingleQuotePairs(); $this->parseDoubleQuotePairs(); } if (empty($this->config['disableGuillemets'])) { $this->parseGuillemets(); } if (empty($this->config['disableMathSymbols'])) { $this->parseNotEqualSign(); $this->parseSymbolsAfterDigits(); $this->parseFractions(); } if (empty($this->config['disablePunctuation'])) { $this->parseDashesAndEllipses(); } if (empty($this->config['disableSymbols'])) { $this->parseSymbolsInParentheses(); } unset($this->text); } /** * Add a fancy replacement tag * * @param integer $tagPos Position of the tag in the text * @param integer $tagLen Length of text consumed by the tag * @param string $chr Replacement character * @param integer $prio Tag's priority * @return \s9e\TextFormatter\Parser\Tag */ protected function addTag($tagPos, $tagLen, $chr, $prio = 0) { $tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio); $tag->setAttribute($this->config['attrName'], $chr); return $tag; } /** * Parse dashes and ellipses * * Does en dash –, em dash — and ellipsis … * * @return void */ protected function parseDashesAndEllipses() { if (strpos($this->text, '...') === false && strpos($this->text, '--') === false) { return; } $chrs = [ '--' => "\xE2\x80\x93", '---' => "\xE2\x80\x94", '...' => "\xE2\x80\xA6" ]; $regexp = '/---?|\\.\\.\\./S'; preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { $this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]); } } /** * Parse pairs of double quotes * * Does quote pairs “” -- must be done separately to handle nesting * * @return void */ protected function parseDoubleQuotePairs() { if ($this->hasDoubleQuote) { $this->parseQuotePairs( '/(?text, '/') === false) { return; } $map = [ '1/4' => "\xC2\xBC", '1/2' => "\xC2\xBD", '3/4' => "\xC2\xBE", '1/7' => "\xE2\x85\x90", '1/9' => "\xE2\x85\x91", '1/10' => "\xE2\x85\x92", '1/3' => "\xE2\x85\x93", '2/3' => "\xE2\x85\x94", '1/5' => "\xE2\x85\x95", '2/5' => "\xE2\x85\x96", '3/5' => "\xE2\x85\x97", '4/5' => "\xE2\x85\x98", '1/6' => "\xE2\x85\x99", '5/6' => "\xE2\x85\x9A", '1/8' => "\xE2\x85\x9B", '3/8' => "\xE2\x85\x9C", '5/8' => "\xE2\x85\x9D", '7/8' => "\xE2\x85\x9E", '0/3' => "\xE2\x86\x89" ]; $regexp = '/\\b(?:0\\/3|1\\/(?:[2-9]|10)|2\\/[35]|3\\/[458]|4\\/5|5\\/[68]|7\\/8)\\b/S'; preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { $this->addTag($m[1], strlen($m[0]), $map[$m[0]]); } } /** * Parse guillemets-style quotation marks * * @return void */ protected function parseGuillemets() { if (strpos($this->text, '<<') === false) { return; } $regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/'; preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { $left = $this->addTag($m[1], 2, "\xC2\xAB"); $right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB"); $left->cascadeInvalidationTo($right); } } /** * Parse the not equal sign * * Supports != and =/= * * @return void */ protected function parseNotEqualSign() { if (strpos($this->text, '!=') === false && strpos($this->text, '=/=') === false) { return; } $regexp = '/\\b (?:!|=\\/)=(?= \\b)/'; preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { $this->addTag($m[1] + 1, strlen($m[0]) - 1, "\xE2\x89\xA0"); } } /** * Parse pairs of quotes * * @param string $regexp Regexp used to identify quote pairs * @param string $leftQuote Fancy replacement for left quote * @param string $rightQuote Fancy replacement for right quote * @return void */ protected function parseQuotePairs($regexp, $leftQuote, $rightQuote) { preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { $left = $this->addTag($m[1], 1, $leftQuote); $right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote); // Cascade left tag's invalidation to the right so that if we skip the left quote, // the right quote remains untouched $left->cascadeInvalidationTo($right); } } /** * Parse pairs of single quotes * * Does quote pairs ‘’ must be done separately to handle nesting * * @return void */ protected function parseSingleQuotePairs() { if ($this->hasSingleQuote) { $this->parseQuotePairs( "/(?hasSingleQuote) { return; } $regexp = "/(?<=\\pL)'|(?text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { // Give this tag a worse priority than default so that quote pairs take precedence $this->addTag($m[1], 1, "\xE2\x80\x99", 10); } } /** * Parse symbols found after digits * * Does symbols found after a digit: * - apostrophe ’ if it's followed by an "s" as in 80's * - prime ′ and double prime ″ * - multiply sign × if it's followed by an optional space and another digit * * @return void */ protected function parseSymbolsAfterDigits() { if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false) { return; } $map = [ // 80's -- use an apostrophe "'s" => "\xE2\x80\x99", // 12' or 12" -- use a prime "'" => "\xE2\x80\xB2", "' " => "\xE2\x80\xB2", "'x" => "\xE2\x80\xB2", '"' => "\xE2\x80\xB3", '" ' => "\xE2\x80\xB3", '"x' => "\xE2\x80\xB3" ]; $regexp = "/[0-9](?>'s|[\"']? ?x(?= ?[0-9])|[\"'])/S"; preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { // Test for a multiply sign at the end if (substr($m[0], -1) === 'x') { $this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97"); } // Test for an apostrophe/prime right after the digit $str = substr($m[0], 1, 2); if (isset($map[$str])) { $this->addTag($m[1] + 1, 1, $map[$str]); } } } /** * Parse symbols found in parentheses such as (c) * * Does symbols ©, ® and ™ * * @return void */ protected function parseSymbolsInParentheses() { if (strpos($this->text, '(') === false) { return; } $chrs = [ '(c)' => "\xC2\xA9", '(r)' => "\xC2\xAE", '(tm)' => "\xE2\x84\xA2" ]; $regexp = '/\\((?>c|r|tm)\\)/i'; preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE); foreach ($matches[0] as $m) { $this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]); } } }