[ Index ]

PHP Cross Reference of phpBB-3.3.14-deutsch

title

Body

[close]

/vendor/s9e/text-formatter/src/Plugins/FancyPants/ -> Parser.php (source)

   1  <?php
   2  
   3  /**
   4  * @package   s9e\TextFormatter
   5  * @copyright Copyright (c) 2010-2022 The s9e authors
   6  * @license   http://www.opensource.org/licenses/mit-license.php The MIT License
   7  */
   8  namespace s9e\TextFormatter\Plugins\FancyPants;
   9  
  10  use s9e\TextFormatter\Plugins\ParserBase;
  11  
  12  class Parser extends ParserBase
  13  {
  14      /**
  15      * @var bool Whether currrent test contains a double quote character
  16      */
  17      protected $hasDoubleQuote;
  18  
  19      /**
  20      * @var bool Whether currrent test contains a single quote character
  21      */
  22      protected $hasSingleQuote;
  23  
  24      /**
  25      * @var string Text being parsed
  26      */
  27      protected $text;
  28  
  29      /**
  30      * {@inheritdoc}
  31      */
  32  	public function parse($text, array $matches)
  33      {
  34          $this->text           = $text;
  35          $this->hasSingleQuote = (strpos($text, "'") !== false);
  36          $this->hasDoubleQuote = (strpos($text, '"') !== false);
  37  
  38          if (empty($this->config['disableQuotes']))
  39          {
  40              $this->parseSingleQuotes();
  41              $this->parseSingleQuotePairs();
  42              $this->parseDoubleQuotePairs();
  43          }
  44          if (empty($this->config['disableGuillemets']))
  45          {
  46              $this->parseGuillemets();
  47          }
  48          if (empty($this->config['disableMathSymbols']))
  49          {
  50              $this->parseNotEqualSign();
  51              $this->parseSymbolsAfterDigits();
  52              $this->parseFractions();
  53          }
  54          if (empty($this->config['disablePunctuation']))
  55          {
  56              $this->parseDashesAndEllipses();
  57          }
  58          if (empty($this->config['disableSymbols']))
  59          {
  60              $this->parseSymbolsInParentheses();
  61          }
  62  
  63          unset($this->text);
  64      }
  65  
  66      /**
  67      * Add a fancy replacement tag
  68      *
  69      * @param  integer $tagPos Position of the tag in the text
  70      * @param  integer $tagLen Length of text consumed by the tag
  71      * @param  string  $chr    Replacement character
  72      * @param  integer $prio   Tag's priority
  73      * @return \s9e\TextFormatter\Parser\Tag
  74      */
  75  	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
  76      {
  77          $tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
  78          $tag->setAttribute($this->config['attrName'], $chr);
  79  
  80          return $tag;
  81      }
  82  
  83      /**
  84      * Parse dashes and ellipses
  85      *
  86      * Does en dash –, em dash — and ellipsis …
  87      *
  88      * @return void
  89      */
  90  	protected function parseDashesAndEllipses()
  91      {
  92          if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
  93          {
  94              return;
  95          }
  96  
  97          $chrs = [
  98              '--'  => "\xE2\x80\x93",
  99              '---' => "\xE2\x80\x94",
 100              '...' => "\xE2\x80\xA6"
 101          ];
 102          $regexp = '/---?|\\.\\.\\./S';
 103          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 104          foreach ($matches[0] as $m)
 105          {
 106              $this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
 107          }
 108      }
 109  
 110      /**
 111      * Parse pairs of double quotes
 112      *
 113      * Does quote pairs “” -- must be done separately to handle nesting
 114      *
 115      * @return void
 116      */
 117  	protected function parseDoubleQuotePairs()
 118      {
 119          if ($this->hasDoubleQuote)
 120          {
 121              $this->parseQuotePairs(
 122                  '/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
 123                  "\xE2\x80\x9C",
 124                  "\xE2\x80\x9D"
 125              );
 126          }
 127      }
 128  
 129      /**
 130      * Parse vulgar fractions
 131      *
 132      * @return void
 133      */
 134  	protected function parseFractions()
 135      {
 136          if (strpos($this->text, '/') === false)
 137          {
 138              return;
 139          }
 140  
 141          $map = [
 142              '1/4'  => "\xC2\xBC",
 143              '1/2'  => "\xC2\xBD",
 144              '3/4'  => "\xC2\xBE",
 145              '1/7'  => "\xE2\x85\x90",
 146              '1/9'  => "\xE2\x85\x91",
 147              '1/10' => "\xE2\x85\x92",
 148              '1/3'  => "\xE2\x85\x93",
 149              '2/3'  => "\xE2\x85\x94",
 150              '1/5'  => "\xE2\x85\x95",
 151              '2/5'  => "\xE2\x85\x96",
 152              '3/5'  => "\xE2\x85\x97",
 153              '4/5'  => "\xE2\x85\x98",
 154              '1/6'  => "\xE2\x85\x99",
 155              '5/6'  => "\xE2\x85\x9A",
 156              '1/8'  => "\xE2\x85\x9B",
 157              '3/8'  => "\xE2\x85\x9C",
 158              '5/8'  => "\xE2\x85\x9D",
 159              '7/8'  => "\xE2\x85\x9E",
 160              '0/3'  => "\xE2\x86\x89"
 161          ];
 162  
 163          $regexp = '/\\b(?:0\\/3|1\\/(?:[2-9]|10)|2\\/[35]|3\\/[458]|4\\/5|5\\/[68]|7\\/8)\\b/S';
 164          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 165          foreach ($matches[0] as $m)
 166          {
 167              $this->addTag($m[1], strlen($m[0]), $map[$m[0]]);
 168          }
 169      }
 170  
 171      /**
 172      * Parse guillemets-style quotation marks
 173      *
 174      * @return void
 175      */
 176  	protected function parseGuillemets()
 177      {
 178          if (strpos($this->text, '<<') === false)
 179          {
 180              return;
 181          }
 182  
 183          $regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
 184          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 185          foreach ($matches[0] as $m)
 186          {
 187              $left  = $this->addTag($m[1],                     2, "\xC2\xAB");
 188              $right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");
 189  
 190              $left->cascadeInvalidationTo($right);
 191          }
 192      }
 193  
 194      /**
 195      * Parse the not equal sign
 196      *
 197      * Supports != and =/=
 198      *
 199      * @return void
 200      */
 201  	protected function parseNotEqualSign()
 202      {
 203          if (strpos($this->text, '!=') === false && strpos($this->text, '=/=') === false)
 204          {
 205              return;
 206          }
 207  
 208          $regexp = '/\\b (?:!|=\\/)=(?= \\b)/';
 209          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 210          foreach ($matches[0] as $m)
 211          {
 212              $this->addTag($m[1] + 1, strlen($m[0]) - 1, "\xE2\x89\xA0");
 213          }
 214      }
 215  
 216      /**
 217      * Parse pairs of quotes
 218      *
 219      * @param  string $regexp     Regexp used to identify quote pairs
 220      * @param  string $leftQuote  Fancy replacement for left quote
 221      * @param  string $rightQuote Fancy replacement for right quote
 222      * @return void
 223      */
 224  	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
 225      {
 226          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 227          foreach ($matches[0] as $m)
 228          {
 229              $left  = $this->addTag($m[1], 1, $leftQuote);
 230              $right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
 231  
 232              // Cascade left tag's invalidation to the right so that if we skip the left quote,
 233              // the right quote remains untouched
 234              $left->cascadeInvalidationTo($right);
 235          }
 236      }
 237  
 238      /**
 239      * Parse pairs of single quotes
 240      *
 241      * Does quote pairs ‘’ must be done separately to handle nesting
 242      *
 243      * @return void
 244      */
 245  	protected function parseSingleQuotePairs()
 246      {
 247          if ($this->hasSingleQuote)
 248          {
 249              $this->parseQuotePairs(
 250                  "/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
 251                  "\xE2\x80\x98",
 252                  "\xE2\x80\x99"
 253              );
 254          }
 255      }
 256  
 257      /**
 258      * Parse single quotes in general
 259      *
 260      * Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
 261      *
 262      * @return void
 263      */
 264  	protected function parseSingleQuotes()
 265      {
 266          if (!$this->hasSingleQuote)
 267          {
 268              return;
 269          }
 270  
 271          $regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
 272          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 273          foreach ($matches[0] as $m)
 274          {
 275              // Give this tag a worse priority than default so that quote pairs take precedence
 276              $this->addTag($m[1], 1, "\xE2\x80\x99", 10);
 277          }
 278      }
 279  
 280      /**
 281      * Parse symbols found after digits
 282      *
 283      * Does symbols found after a digit:
 284      *  - apostrophe ’ if it's followed by an "s" as in 80's
 285      *  - prime ′ and double prime ″
 286      *  - multiply sign × if it's followed by an optional space and another digit
 287      *
 288      * @return void
 289      */
 290  	protected function parseSymbolsAfterDigits()
 291      {
 292          if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
 293          {
 294              return;
 295          }
 296  
 297          $map = [
 298              // 80's -- use an apostrophe
 299              "'s" => "\xE2\x80\x99",
 300              // 12' or 12" -- use a prime
 301              "'"  => "\xE2\x80\xB2",
 302              "' " => "\xE2\x80\xB2",
 303              "'x" => "\xE2\x80\xB2",
 304              '"'  => "\xE2\x80\xB3",
 305              '" ' => "\xE2\x80\xB3",
 306              '"x' => "\xE2\x80\xB3"
 307          ];
 308  
 309          $regexp = "/[0-9](?>'s|[\"']? ?x(?= ?[0-9])|[\"'])/S";
 310          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 311          foreach ($matches[0] as $m)
 312          {
 313              // Test for a multiply sign at the end
 314              if (substr($m[0], -1) === 'x')
 315              {
 316                  $this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
 317              }
 318  
 319              // Test for an apostrophe/prime right after the digit
 320              $str = substr($m[0], 1, 2);
 321              if (isset($map[$str]))
 322              {
 323                  $this->addTag($m[1] + 1, 1, $map[$str]);
 324              }
 325          }
 326      }
 327  
 328      /**
 329      * Parse symbols found in parentheses such as (c)
 330      *
 331      * Does symbols ©, ® and ™
 332      *
 333      * @return void
 334      */
 335  	protected function parseSymbolsInParentheses()
 336      {
 337          if (strpos($this->text, '(') === false)
 338          {
 339              return;
 340          }
 341  
 342          $chrs = [
 343              '(c)'  => "\xC2\xA9",
 344              '(r)'  => "\xC2\xAE",
 345              '(tm)' => "\xE2\x84\xA2"
 346          ];
 347          $regexp = '/\\((?>c|r|tm)\\)/i';
 348          preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
 349          foreach ($matches[0] as $m)
 350          {
 351              $this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
 352          }
 353      }
 354  }


Generated: Mon Nov 25 19:05:08 2024 Cross-referenced by PHPXref 0.7.1