descendants of and all attributes of whose name
* matches given regexp. This method will NOT catch elements whose 'name' attribute is
* set via an
*
* @param DOMDocument $dom Document
* @param string $regexp
* @return DOMNode[] List of DOMNode instances
*/
public static function getObjectParamsByRegexp(DOMDocument $dom, $regexp)
{
$xpath = new DOMXPath($dom);
$nodes = [];
// Collect attributes from elements
foreach (self::getAttributesByRegexp($dom, $regexp) as $attribute)
{
if ($attribute->nodeType === XML_ATTRIBUTE_NODE)
{
if (strtolower($attribute->parentNode->localName) === 'embed')
{
$nodes[] = $attribute;
}
}
elseif ($xpath->evaluate('count(ancestor::embed)', $attribute))
{
// Assuming or
$nodes[] = $attribute;
}
}
// Collect descendants of elements
foreach ($xpath->query('//object//param') as $param)
{
if (preg_match($regexp, $param->getAttribute('name')))
{
$nodes[] = $param;
}
}
return $nodes;
}
/**
* Return all DOMNodes whose content is an URL
*
* NOTE: it will also return HTML4 nodes whose content is an URI
*
* @param DOMDocument $dom Document
* @return DOMNode[] List of DOMNode instances
*/
public static function getURLNodes(DOMDocument $dom)
{
$regexp = '/(?:^(?:action|background|c(?:ite|lassid|odebase)|data|formaction|href|i(?:con|tem(?:id|prop|type))|longdesc|manifest|p(?:ing|luginspage|oster|rofile)|usemap)|src)$/i';
$nodes = self::getAttributesByRegexp($dom, $regexp);
/**
* @link http://helpx.adobe.com/flash/kb/object-tag-syntax-flash-professional.html
* @link http://www.sitepoint.com/control-internet-explorer/
*/
foreach (self::getObjectParamsByRegexp($dom, '/^(?:dataurl|movie)$/i') as $param)
{
$node = $param->getAttributeNode('value');
if ($node)
{
$nodes[] = $node;
}
}
return $nodes;
}
/**
* Return all nodes of given type
*
* @param DOMDocument $dom Owner document
* @param string $type Node type ('element' or 'attribute')
* @return DOMNode[] List of DOMNode instances
*/
protected static function getNodes(DOMDocument $dom, $type)
{
$nodes = [];
$prefix = ($type === 'attribute') ? '@' : '';
$xpath = new DOMXPath($dom);
// Get natural nodes
foreach ($xpath->query('//' . $prefix . '*') as $node)
{
$nodes[] = [$node, $node->nodeName];
}
// Get XSL-generated nodes
foreach ($xpath->query('//xsl:' . $type) as $node)
{
$nodes[] = [$node, $node->getAttribute('name')];
}
// Get xsl:copy-of nodes
foreach ($xpath->query('//xsl:copy-of') as $node)
{
if (preg_match('/^' . $prefix . '(\\w+)$/', $node->getAttribute('select'), $m))
{
$nodes[] = [$node, $m[1]];
}
}
return $nodes;
}
/**
* Return all nodes (literal or generated) that match given regexp
*
* @param DOMDocument $dom Owner document
* @param string $regexp Regexp
* @param string $type Node type ('element' or 'attribute')
* @return DOMNode[] List of DOMNode instances
*/
protected static function getNodesByRegexp(DOMDocument $dom, $regexp, $type)
{
$nodes = [];
foreach (self::getNodes($dom, $type) as list($node, $name))
{
if (preg_match($regexp, $name))
{
$nodes[] = $node;
}
}
return $nodes;
}
}