\strlen($def['regexp']), 'len' => 0, 'type' => 'end' ]; $patterns = []; $literal = ''; $pos = 0; $skipPos = 0; $depth = 0; foreach ($def['tokens'] as $token) { if ($token['type'] === 'option') $skipPos = \max($skipPos, $token['pos'] + $token['len']); if (\strpos($token['type'], 'AssertionStart') !== \false) { $endToken = $def['tokens'][$token['endToken']]; $skipPos = \max($skipPos, $endToken['pos'] + $endToken['len']); } if ($token['pos'] >= $skipPos) { if ($token['type'] === 'characterClass') $patterns[] = '[' . $token['content'] . ']'; if ($token['pos'] > $pos) { $tmp = \substr($def['regexp'], $pos, $token['pos'] - $pos); $literal .= $tmp; if (!$depth) { $tmp = \str_replace('\\\\', '', $tmp); if (\preg_match('/(? $m[1], 'modifiers' => $m[3], 'regexp' => $m[2], 'tokens' => [] ]; $regexp = $m[2]; $openSubpatterns = []; $pos = 0; $regexpLen = \strlen($regexp); while ($pos < $regexpLen) { switch ($regexp[$pos]) { case '\\': $pos += 2; break; case '[': if (!\preg_match('#\\[(.*?(? $pos, 'len' => \strlen($m[0]), 'type' => 'characterClass', 'content' => $m[1], 'quantifiers' => $m[2] ]; $pos += \strlen($m[0]); break; case '(': if (\preg_match('#\\(\\?([a-z]*)\\)#iA', $regexp, $m, 0, $pos)) { $ret['tokens'][] = [ 'pos' => $pos, 'len' => \strlen($m[0]), 'type' => 'option', 'options' => $m[1] ]; $pos += \strlen($m[0]); break; } if (\preg_match("#(?J)\\(\\?(?:P?<(?[a-z_0-9]+)>|'(?[a-z_0-9]+)')#A", $regexp, $m, \PREG_OFFSET_CAPTURE, $pos)) { $tok = [ 'pos' => $pos, 'len' => \strlen($m[0][0]), 'type' => 'capturingSubpatternStart', 'name' => $m['name'][0] ]; $pos += \strlen($m[0][0]); } elseif (\preg_match('#\\(\\?([a-z]*):#iA', $regexp, $m, 0, $pos)) { $tok = [ 'pos' => $pos, 'len' => \strlen($m[0]), 'type' => 'nonCapturingSubpatternStart', 'options' => $m[1] ]; $pos += \strlen($m[0]); } elseif (\preg_match('#\\(\\?>#iA', $regexp, $m, 0, $pos)) { $tok = [ 'pos' => $pos, 'len' => \strlen($m[0]), 'type' => 'nonCapturingSubpatternStart', 'subtype' => 'atomic' ]; $pos += \strlen($m[0]); } elseif (\preg_match('#\\(\\?( 'lookahead', '<=' => 'lookbehind', '!' => 'negativeLookahead', ' 'negativeLookbehind' ]; $tok = [ 'pos' => $pos, 'len' => \strlen($m[0]), 'type' => $assertions[$m[1]] . 'AssertionStart' ]; $pos += \strlen($m[0]); } elseif (\preg_match('#\\(\\?#A', $regexp, $m, 0, $pos)) throw new RuntimeException('Unsupported subpattern type at pos ' . $pos); else { $tok = [ 'pos' => $pos, 'len' => 1, 'type' => 'capturingSubpatternStart' ]; ++$pos; } $openSubpatterns[] = \count($ret['tokens']); $ret['tokens'][] = $tok; break; case ')': if (empty($openSubpatterns)) throw new RuntimeException('Could not find matching pattern start for right parenthesis at pos ' . $pos); $k = \array_pop($openSubpatterns); $startToken =& $ret['tokens'][$k]; $startToken['endToken'] = \count($ret['tokens']); $startToken['content'] = \substr( $regexp, $startToken['pos'] + $startToken['len'], $pos - ($startToken['pos'] + $startToken['len']) ); $spn = \strspn($regexp, '+*?', 1 + $pos); $quantifiers = \substr($regexp, 1 + $pos, $spn); $ret['tokens'][] = [ 'pos' => $pos, 'len' => 1 + $spn, 'type' => \substr($startToken['type'], 0, -5) . 'End', 'quantifiers' => $quantifiers ]; unset($startToken); $pos += 1 + $spn; break; default: ++$pos; } } if (!empty($openSubpatterns)) throw new RuntimeException('Could not find matching pattern end for left parenthesis at pos ' . $ret['tokens'][$openSubpatterns[0]]['pos']); return $ret; } }