Skip to content
Advertisement

DOMDocument – get script text from within body

What I am trying to do is get scripts from body tag but only scripts that have text not script links

eg. <script type="text/javascript">console.log("for a test run");</script>

not the scripts that have file src.

And I want to place those scripts to end of page before </body>.

So far I have

        echo "<pre>";
        echo "reaches 1 <br />";
        //work for inpage scripts
        $mainBody = @$dom->getElementsByTagName('body')->item(0);
        foreach (@$dom->getElementsByTagName('body') as $head) {
            echo "reaches 2";

            foreach (@$head->childNodes as $node) {

                echo "reaches 3";
                var_dump($node);
                if ($node instanceof DOMComment) {
                    if (preg_match('/<script/i', $node->nodeValue)){
                        $src = $node->nodeValue;
                        echo "its a node";
                        var_dump($node);
                    }
                }
                if ($node->nodeName == 'script' && $node->attributes->getNamedItem('type')->nodeValue == 'text/javascript') {
                    if (@$src = $node->attributes->getNamedItem('src')->nodeValue) {
                        // yay - $src was true, so we don't do anything here
                    } else {
                        $src = $node->nodeValue;
                    }
                    echo "its a node2";
                    var_dump($node);
                }
                if (isset($src)) {
                    $move = ($this->params->get('exclude')) ? true : false;
                    foreach ($omit as $omitit) {
                        if (preg_match($omitit, $src) == 1) {
                            $move = ($this->params->get('exclude')) ? false : true;
                            break;
                        }
                    }
                    if ($move)
                        $moveme[] = $node;
                    unset($src);
                }
            }
        }
        foreach ($moveme as $moveit) {
            echo "Moving";
            print_r($moveit);
            $mainBody->appendChild($moveit->cloneNode(true));
            if ($pretty) {
                $mainBody->appendChild($newline->cloneNode(false));
            }
            $moveit->parentNode->removeChild($moveit);
        }
$mainBody = $xhtml ? $dom->saveXML() : $dom->saveHTML();

        JResponse::setBody($sanitize?preg_replace($this->sanitizews['search'],$this->sanitizews['replace'],$mainBody):$mainBody);

Update 1

The problem is <script type="text/javascript"> can also be in div or can be in nested divs. So as using foreach @$head->childNodes only gets the top html tags and do not scan the inner tags that may contain <script> tags. I don’t understand how to get all required script tags.

And there is no error but there also has no script tags on top nodes.

Update 2

After an answer of xpath, thanks for the answer. There is some progress in task. But now after moving of scripts to footer, I can’t delete/remove original script tags.

Here is the updated code I have so far:

        echo "<pre>3";
//        echo "reaches 1 <br />";
        //work for inpage scripts
        $xpath = new DOMXPath($dom);
        $script_tags = $xpath->query('//body//script[not(@src)]');

        foreach ($script_tags as $tag) {
//            var_dump($tag->nodeValue);
            $moveme[] = $tag;
        }
        $mainBody = @$dom->getElementsByTagName('body')->item(0);
        foreach ($moveme as $moveItScript) {

            print_r($moveItScript->cloneNode(true));
            $mainBody->appendChild($moveItScript->cloneNode(true));
//            var_dump($moveItScript->parentNode);
//            $moveItScript->parentNode->removeChild($moveItScript);
/*            try{
                $mainBody->appendChild($moveit->cloneNode(true));
                if ($pretty) {
                    $body->appendChild($newline->cloneNode(false));
                }
                $moveit->parentNode->removeChild($moveit);
            }catch (Exception $ex){
                var_dump($ex);
            }*/
        }
        echo "</pre>";

Update 3

I was working for Joomla, was trying to move scripts to footer of the page. I had used the scriptsdown plugin, which moved the scripts from head tag to bottom. but the scripts with in the mid page were not moved to the bottom, so that what was causing the inpage scripts to not respond properly.

My problem is now solved. Posting my solution code so if it might help someone in future.

function onAfterRender() {
        $app = JFactory::getApplication();
        $doc = JFactory::getDocument();
        /* test that the page is not administrator && test that the document is HTML output */
        if ($app->isAdmin() || $doc->getType() != 'html')
            return;
        $pretty = (int)$this->params->get('pretty', 0);
        $stripcomments = (int)$this->params->get('stripcomments', 0);
        $sanitize = (int)$this->params->get('sanitize',0);
        $debug = (int)$app->getCfg('debug',0);
        if($debug) $pretty = true;
        $omit = array();
        /* now we know this is a frontend page and it is html - begin processing */
        /* first - prepare the omit array */

        if (strlen(trim($this->params->get('omit'))) > 0) {
            foreach (explode("n", $this->params->get('omit')) as $omitme) {
                $omit[] = '/' . str_replace(array('/', '''), array('/', '\''), trim($omitme)) . '/i';
            }
            unset($omitme);
        }
        $moveme = array();
        $dom = new DOMDocument();
        $dom->recover = true;
        $dom->substituteEntities = true;
        if ($pretty) {
            $dom->formatOutput = true;
        } else {
            $dom->preserveWhiteSpace = false;
        }
        $source = JResponse::getBody();
        /* DOMDocument can get quite vocal when malformed HTML/XHTML is loaded.
         * First we grab the current level, and set the error reporting level
         * to zero, afterwards, we return it to the original value.  This trickery
         * is used to keep the logs clear of DOMDocument protests while loading the source.
         * I promise to set the level back as soon as I'm done loading source...
         */
        if(!$debug) $erlevel = error_reporting(0);
        $xhtml = (preg_match('/XHTML/', $source)) ? true : false;
        switch ($xhtml) {
            case true:
                $dom->loadXML($source);
                break;
            case false:
                $dom->loadHTML($source);
                break;
        }
        if(!$debug) error_reporting($erlevel); /* You see, error_reporting is back to normal - just like I promised */

        if ($pretty) {
            $newline = $dom->createTextNode("n");
        }

        if($sanitize && !$debug && !$pretty) {
            $this->_sanitizeCSS($dom->getElementsByTagName('style'));
        }

        if ($stripcomments && !$debug) {
            $comments = $this->_domComments($dom);
            foreach ($comments as $node)
                if (!preg_match('/[endif]/i', $node->nodeValue)) // we don't remove IE conditionals
                    if ($node->parentNode->nodeName != 'script') // we also don't remove comments in javascript because some developers write JS inside of a comment
                        $node->parentNode->removeChild($node);
        }
        $body = @$dom->getElementsByTagName('footer')->item(0);
        foreach (@$dom->getElementsByTagName('head') as $head) {
            foreach (@$head->childNodes as $node) {
                if ($node instanceof DOMComment) {
                    if (preg_match('/<script/i', $node->nodeValue))
                        $src = $node->nodeValue;
                }
                if ($node->nodeName == 'script' && $node->attributes->getNamedItem('type')->nodeValue == 'text/javascript') {
                    if (@$src = $node->attributes->getNamedItem('src')->nodeValue) {
                        // yay - $src was true, so we don't do anything here
                    } else {
                        $src = $node->nodeValue;
                    }
                }
                if (isset($src)) {
                    $move = ($this->params->get('exclude')) ? true : false;
                    foreach ($omit as $omitit) {
                        if (preg_match($omitit, $src) == 1) {
                            $move = ($this->params->get('exclude')) ? false : true;
                            break;
                        }
                    }
                    if ($move)
                        $moveme[] = $node;
                    unset($src);
                }
            }
        }
        foreach ($moveme as $moveit) {
            $body->appendChild($moveit->cloneNode(true));
            if ($pretty) {
                $body->appendChild($newline->cloneNode(false));
            }
            $moveit->parentNode->removeChild($moveit);
        }

        //work for inpage scripts
        $xpath = new DOMXPath($dom);
        $script_tags = $xpath->query('//body//script[not(@src)]');

        $mainBody = @$dom->getElementsByTagName('body')->item(0);
        foreach ($script_tags as $tag) {
            $mainBody->appendChild($tag->cloneNode(true));
            $tag->parentNode->removeChild($tag);
        }

        $body = $xhtml ? $dom->saveXML() : $dom->saveHTML();
        JResponse::setBody($sanitize?preg_replace($this->sanitizews['search'],$this->sanitizews['replace'],$body):$body);
    }

Advertisement

Answer

In order to get ONLY the <script> nodes that dont have the src attribute you better use the DOMXPath:

$xpath = new DOMXPath($dom);
$script_tags = $xpath->query('//body//script[not(@src)]');

The variable $script_tags is now a DOMNodeList object that contains all of your script tags. You can now loop over the DOMNodeList to get all the nodes and do whatever you would like to do with them:

foreach ($script_tags as $tag) {
    var_dump($tag->nodeValue);
    $moveme[] = $tag;
}
User contributions licensed under: CC BY-SA
1 People found this is helpful
Advertisement