descendants of and all attributes of whose name * matches given regexp. This method will NOT catch elements whose 'name' attribute is * set via an * * @param DOMDocument $dom Document * @param string $regexp * @return DOMNode[] List of DOMNode instances */ public static function getObjectParamsByRegexp(DOMDocument $dom, $regexp) { $xpath = new DOMXPath($dom); $nodes = []; // Collect attributes from elements foreach (self::getAttributesByRegexp($dom, $regexp) as $attribute) { if ($attribute->nodeType === XML_ATTRIBUTE_NODE) { if (strtolower($attribute->parentNode->localName) === 'embed') { $nodes[] = $attribute; } } elseif ($xpath->evaluate('count(ancestor::embed)', $attribute)) { // Assuming or $nodes[] = $attribute; } } // Collect descendants of elements foreach ($xpath->query('//object//param') as $param) { if (preg_match($regexp, $param->getAttribute('name'))) { $nodes[] = $param; } } return $nodes; } /** * Return all DOMNodes whose content is an URL * * NOTE: it will also return HTML4 nodes whose content is an URI * * @param DOMDocument $dom Document * @return DOMNode[] List of DOMNode instances */ public static function getURLNodes(DOMDocument $dom) { $regexp = '/(?:^(?:action|background|c(?:ite|lassid|odebase)|data|formaction|href|i(?:con|tem(?:id|prop|type))|longdesc|manifest|p(?:ing|luginspage|oster|rofile)|usemap)|src)$/i'; $nodes = self::getAttributesByRegexp($dom, $regexp); /** * @link http://helpx.adobe.com/flash/kb/object-tag-syntax-flash-professional.html * @link http://www.sitepoint.com/control-internet-explorer/ */ foreach (self::getObjectParamsByRegexp($dom, '/^(?:dataurl|movie)$/i') as $param) { $node = $param->getAttributeNode('value'); if ($node) { $nodes[] = $node; } } return $nodes; } /** * Return all nodes of given type * * @param DOMDocument $dom Owner document * @param string $type Node type ('element' or 'attribute') * @return DOMNode[] List of DOMNode instances */ protected static function getNodes(DOMDocument $dom, $type) { $nodes = []; $prefix = ($type === 'attribute') ? '@' : ''; $xpath = new DOMXPath($dom); // Get natural nodes foreach ($xpath->query('//' . $prefix . '*') as $node) { $nodes[] = [$node, $node->nodeName]; } // Get XSL-generated nodes foreach ($xpath->query('//xsl:' . $type) as $node) { $nodes[] = [$node, $node->getAttribute('name')]; } // Get xsl:copy-of nodes foreach ($xpath->query('//xsl:copy-of') as $node) { if (preg_match('/^' . $prefix . '(\\w+)$/', $node->getAttribute('select'), $m)) { $nodes[] = [$node, $m[1]]; } } return $nodes; } /** * Return all nodes (literal or generated) that match given regexp * * @param DOMDocument $dom Owner document * @param string $regexp Regexp * @param string $type Node type ('element' or 'attribute') * @return DOMNode[] List of DOMNode instances */ protected static function getNodesByRegexp(DOMDocument $dom, $regexp, $type) { $nodes = []; foreach (self::getNodes($dom, $type) as list($node, $name)) { if (preg_match($regexp, $name)) { $nodes[] = $node; } } return $nodes; } }