value] */ protected $collection; /** * @var boolean Whether to capture only the first occurence of each keyword */ public $onlyFirst = false; /** * @var string Name of the tag used by this plugin */ protected $tagName = 'KEYWORD'; /** * {@inheritdoc} */ protected function setUp() { $this->collection = new NormalizedList; $this->configurator->tags->add($this->tagName)->attributes->add($this->attrName); } /** * {@inheritdoc} */ public function asConfig() { if (!count($this->collection)) { return; } $config = [ 'attrName' => $this->attrName, 'tagName' => $this->tagName ]; if (!empty($this->onlyFirst)) { $config['onlyFirst'] = $this->onlyFirst; } // Sort keywords in order to keep keywords that start with the same characters together. We // also remove duplicates that would otherwise skew the length computation done below $keywords = array_unique(iterator_to_array($this->collection)); sort($keywords); // Group keywords by chunks of ~30KB to remain below PCRE's limit $groups = []; $groupKey = 0; $groupLen = 0; foreach ($keywords as $keyword) { // NOTE: the value 4 is a guesstimate for the cost of each alternation $keywordLen = 4 + strlen($keyword); $groupLen += $keywordLen; if ($groupLen > 30000) { $groupLen = $keywordLen; ++$groupKey; } $groups[$groupKey][] = $keyword; } foreach ($groups as $keywords) { $regexp = RegexpBuilder::fromList( $keywords, ['caseInsensitive' => !$this->caseSensitive] ); $regexp = '/\\b' . $regexp . '\\b/S'; if (!$this->caseSensitive) { $regexp .= 'i'; } if (preg_match('/[^[:ascii:]]/', $regexp)) { $regexp .= 'u'; } $config['regexps'][] = new Regexp($regexp, true); } return $config; } }