]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blobdiff - includes/parser/Preprocessor_DOM.php
MediaWiki 1.17.0
[autoinstallsdev/mediawiki.git] / includes / parser / Preprocessor_DOM.php
index af591b67d2fceaa9f7ed408f325738b5fc0b09fe..2b635f7c0d251e1d060d6ea1de5a75b5528f1096 100644 (file)
@@ -1,11 +1,19 @@
 <?php
-
+/**
+ * Preprocessor using PHP's dom extension
+ *
+ * @file
+ * @ingroup Parser
+ */
 /**
  * @ingroup Parser
  */
 class Preprocessor_DOM implements Preprocessor {
        var $parser, $memoryLimit;
 
+       const CACHE_VERSION = 1;
+
        function __construct( $parser ) {
                $this->parser = $parser;
                $mem = ini_get( 'memory_limit' );
@@ -27,6 +35,30 @@ class Preprocessor_DOM implements Preprocessor {
                return new PPCustomFrame_DOM( $this, $args );
        }
 
+       function newPartNodeArray( $values ) {
+               //NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
+               $xml = "";
+               $xml .= "<list>";
+
+               foreach ( $values as $k => $val ) {
+                        
+                       if ( is_int( $k ) ) {
+                               $xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) ."</value></part>";
+                       } else {
+                               $xml .= "<part><name>" . htmlspecialchars( $k ) . "</name>=<value>" . htmlspecialchars( $val ) . "</value></part>";
+                       }
+               }
+
+               $xml .= "</list>";
+
+               $dom = new DOMDocument();
+               $dom->loadXML( $xml );
+               $root = $dom->documentElement;
+
+               $node = new PPNode_DOM( $root->childNodes );
+               return $node;
+       }
+
        function memCheck() {
                if ( $this->memoryLimit === false ) {
                        return;
@@ -43,8 +75,8 @@ class Preprocessor_DOM implements Preprocessor {
         * Preprocess some wikitext and return the document tree.
         * This is the ghost of Parser::replace_variables().
         *
-        * @param string $text The text to parse
-        * @param integer flags Bitwise combination of:
+        * @param $text String: the text to parse
+        * @param $flags Integer: bitwise combination of:
         *          Parser::PTD_FOR_INCLUSION    Handle <noinclude>/<includeonly> as if the text is being
         *                                     included. Default is to assume a direct page view.
         *
@@ -63,8 +95,61 @@ class Preprocessor_DOM implements Preprocessor {
         */
        function preprocessToObj( $text, $flags = 0 ) {
                wfProfileIn( __METHOD__ );
-               wfProfileIn( __METHOD__.'-makexml' );
+               global $wgMemc, $wgPreprocessorCacheThreshold;
+               
+               $xml = false;
+               $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+               if ( $cacheable ) {
+                       wfProfileIn( __METHOD__.'-cacheable' );
+
+                       $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
+                       $cacheValue = $wgMemc->get( $cacheKey );
+                       if ( $cacheValue ) {
+                               $version = substr( $cacheValue, 0, 8 );
+                               if ( intval( $version ) == self::CACHE_VERSION ) {
+                                       $xml = substr( $cacheValue, 8 );
+                                       // From the cache
+                                       wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
+                               }
+                       }
+               }
+               if ( $xml === false ) {
+                       if ( $cacheable ) {
+                               wfProfileIn( __METHOD__.'-cache-miss' );
+                               $xml = $this->preprocessToXml( $text, $flags );
+                               $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
+                               $wgMemc->set( $cacheKey, $cacheValue, 86400 );
+                               wfProfileOut( __METHOD__.'-cache-miss' );
+                               wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
+                       } else {
+                               $xml = $this->preprocessToXml( $text, $flags );
+                       }
 
+               }
+               wfProfileIn( __METHOD__.'-loadXML' );
+               $dom = new DOMDocument;
+               wfSuppressWarnings();
+               $result = $dom->loadXML( $xml );
+               wfRestoreWarnings();
+               if ( !$result ) {
+                       // Try running the XML through UtfNormal to get rid of invalid characters
+                       $xml = UtfNormal::cleanUp( $xml );
+                       $result = $dom->loadXML( $xml );
+                       if ( !$result ) {
+                               throw new MWException( __METHOD__.' generated invalid XML' );
+                       }
+               }
+               $obj = new PPNode_DOM( $dom->documentElement );
+               wfProfileOut( __METHOD__.'-loadXML' );
+               if ( $cacheable ) {
+                       wfProfileOut( __METHOD__.'-cacheable' );
+               }
+               wfProfileOut( __METHOD__ );
+               return $obj;
+       }
+       
+       function preprocessToXml( $text, $flags = 0 ) {
+               wfProfileIn( __METHOD__ );
                $rules = array(
                        '{' => array(
                                'end' => '}',
@@ -304,7 +389,9 @@ class Preprocessor_DOM implements Preprocessor {
                                } else {
                                        $attrEnd = $tagEndPos;
                                        // Find closing tag
-                                       if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
+                                       if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", 
+                                                       $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) 
+                                       {
                                                $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
                                                $i = $matches[0][1] + strlen( $matches[0][0] );
                                                $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
@@ -362,7 +449,8 @@ class Preprocessor_DOM implements Preprocessor {
                                                'count' => $count );
                                        $stack->push( $piece );
                                        $accum =& $stack->getAccum();
-                                       extract( $stack->getFlags() );
+                                       $flags = $stack->getFlags();
+                                       extract( $flags );
                                        $i += $count;
                                }
                        }
@@ -385,7 +473,7 @@ class Preprocessor_DOM implements Preprocessor {
                                $count = $piece->count;
                                $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart );
                                if ( $equalsLength > 0 ) {
-                                       if ( $i - $equalsLength == $piece->startPos ) {
+                                       if ( $searchStart - $equalsLength == $piece->startPos ) {
                                                // This is just a single string of equals signs on its own line
                                                // Replicate the doHeadings behaviour /={count}(.+)={count}/
                                                // First find out how many equals signs there really are (don't stop at 6)
@@ -413,7 +501,8 @@ class Preprocessor_DOM implements Preprocessor {
                                // Unwind the stack
                                $stack->pop();
                                $accum =& $stack->getAccum();
-                               extract( $stack->getFlags() );
+                               $flags = $stack->getFlags();
+                               extract( $flags );
 
                                // Append the result to the enclosing accumulator
                                $accum .= $element;
@@ -422,9 +511,7 @@ class Preprocessor_DOM implements Preprocessor {
                                // another heading. Infinite loops are avoided because the next iteration MUST
                                // hit the heading open case above, which unconditionally increments the
                                // input pointer.
-                       }
-
-                       elseif ( $found == 'open' ) {
+                       } elseif ( $found == 'open' ) {
                                # count opening brace characters
                                $count = strspn( $text, $curChar, $i );
 
@@ -440,15 +527,14 @@ class Preprocessor_DOM implements Preprocessor {
 
                                        $stack->push( $piece );
                                        $accum =& $stack->getAccum();
-                                       extract( $stack->getFlags() );
+                                       $flags = $stack->getFlags();
+                                       extract( $flags );
                                } else {
                                        # Add literal brace(s)
                                        $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
                                }
                                $i += $count;
-                       }
-
-                       elseif ( $found == 'close' ) {
+                       } elseif ( $found == 'close' ) {
                                $piece = $stack->top;
                                # lets check if there are enough characters for closing brace
                                $maxCount = $piece->count;
@@ -456,7 +542,6 @@ class Preprocessor_DOM implements Preprocessor {
 
                                # check for maximum matching characters (if there are 5 closing
                                # characters, we will probably need only 3 - depending on the rules)
-                               $matchingCount = 0;
                                $rule = $rules[$piece->open];
                                if ( $count > $rule['max'] ) {
                                        # The specified maximum exists in the callback array, unless the caller
@@ -501,7 +586,7 @@ class Preprocessor_DOM implements Preprocessor {
                                        $element = "<$name$attr>";
                                        $element .= "<title>$title</title>";
                                        $argIndex = 1;
-                                       foreach ( $parts as $partIndex => $part ) {
+                                       foreach ( $parts as $part ) {
                                                if ( isset( $part->eqpos ) ) {
                                                        $argName = substr( $part->out, 0, $part->eqpos );
                                                        $argValue = substr( $part->out, $part->eqpos + 1 );
@@ -540,8 +625,8 @@ class Preprocessor_DOM implements Preprocessor {
                                        }
                                        $enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
                                }
-
-                               extract( $stack->getFlags() );
+                               $flags = $stack->getFlags();
+                               extract( $flags );
 
                                # Add XML element to the enclosing accumulator
                                $accum .= $element;
@@ -569,24 +654,9 @@ class Preprocessor_DOM implements Preprocessor {
                $stack->rootAccum .= '</root>';
                $xml = $stack->rootAccum;
 
-               wfProfileOut( __METHOD__.'-makexml' );
-               wfProfileIn( __METHOD__.'-loadXML' );
-               $dom = new DOMDocument;
-               wfSuppressWarnings();
-               $result = $dom->loadXML( $xml );
-               wfRestoreWarnings();
-               if ( !$result ) {
-                       // Try running the XML through UtfNormal to get rid of invalid characters
-                       $xml = UtfNormal::cleanUp( $xml );
-                       $result = $dom->loadXML( $xml );
-                       if ( !$result ) {
-                               throw new MWException( __METHOD__.' generated invalid XML' );
-                       }
-               }
-               $obj = new PPNode_DOM( $dom->documentElement );
-               wfProfileOut( __METHOD__.'-loadXML' );
                wfProfileOut( __METHOD__ );
-               return $obj;
+               
+               return $xml;
        }
 }
 
@@ -777,7 +847,7 @@ class PPFrame_DOM implements PPFrame {
 
        /**
         * Construct a new preprocessor frame.
-        * @param Preprocessor $preprocessor The parent preprocessor
+        * @param $preprocessor Preprocessor: The parent preprocessor
         */
        function __construct( $preprocessor ) {
                $this->preprocessor = $preprocessor;
@@ -831,16 +901,16 @@ class PPFrame_DOM implements PPFrame {
                if ( is_string( $root ) ) {
                        return $root;
                }
-               wfProfileIn( __METHOD__ );
 
-               if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
+               if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() )
                {
                        return '<span class="error">Node-count limit exceeded</span>';
                }
 
-               if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
+               if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
                        return '<span class="error">Expansion depth limit exceeded</span>';
                }
+               wfProfileIn( __METHOD__ );
                ++$expansionDepth;
 
                if ( $root instanceof PPNode_DOM ) {
@@ -887,7 +957,9 @@ class PPFrame_DOM implements PPFrame {
                                $iteratorStack[$level] = false;
                        }
 
-                       if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node;
+                       if ( $contextNode instanceof PPNode_DOM ) {
+                               $contextNode = $contextNode->node;
+                       }
 
                        $newIterator = false;
 
@@ -906,7 +978,7 @@ class PPFrame_DOM implements PPFrame {
                                        $titles = $xpath->query( 'title', $contextNode );
                                        $title = $titles->item( 0 );
                                        $parts = $xpath->query( 'part', $contextNode );
-                                       if ( $flags & self::NO_TEMPLATES ) {
+                                       if ( $flags & PPFrame::NO_TEMPLATES ) {
                                                $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
                                        } else {
                                                $lineStart = $contextNode->getAttribute( 'lineStart' );
@@ -927,7 +999,7 @@ class PPFrame_DOM implements PPFrame {
                                        $titles = $xpath->query( 'title', $contextNode );
                                        $title = $titles->item( 0 );
                                        $parts = $xpath->query( 'part', $contextNode );
-                                       if ( $flags & self::NO_ARGS ) {
+                                       if ( $flags & PPFrame::NO_ARGS ) {
                                                $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
                                        } else {
                                                $params = array(
@@ -945,13 +1017,13 @@ class PPFrame_DOM implements PPFrame {
                                        # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
                                        if ( $this->parser->ot['html']
                                                || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
-                                               || ( $flags & self::STRIP_COMMENTS ) )
+                                               || ( $flags & PPFrame::STRIP_COMMENTS ) )
                                        {
                                                $out .= '';
                                        }
                                        # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
                                        # Not in RECOVER_COMMENTS mode (extractSections) though
-                                       elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
+                                       elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) {
                                                $out .= $this->parser->insertStripItem( $contextNode->textContent );
                                        }
                                        # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
@@ -963,7 +1035,7 @@ class PPFrame_DOM implements PPFrame {
                                        # OT_WIKI will only respect <ignore> in substed templates.
                                        # The other output types respect it unless NO_IGNORE is set.
                                        # extractSections() sets NO_IGNORE and so never respects it.
-                                       if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
+                                       if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & PPFrame::NO_IGNORE ) ) {
                                                $out .= $contextNode->textContent;
                                        } else {
                                                $out .= '';
@@ -1067,7 +1139,9 @@ class PPFrame_DOM implements PPFrame {
                $first = true;
                $s = '';
                foreach ( $args as $root ) {
-                       if ( $root instanceof PPNode_DOM ) $root = $root->node;
+                       if ( $root instanceof PPNode_DOM ) {
+                               $root = $root->node;
+                       }
                        if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
                                $root = array( $root );
                        }
@@ -1091,9 +1165,11 @@ class PPFrame_DOM implements PPFrame {
                $args = array_slice( func_get_args(), 1 );
                $out = array();
                $first = true;
-               if ( $root instanceof PPNode_DOM ) $root = $root->node;
 
                foreach ( $args as $root ) {
+                       if ( $root instanceof PPNode_DOM ) {
+                               $root = $root->node;
+                       }
                        if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
                                $root = array( $root );
                        }
@@ -1118,7 +1194,9 @@ class PPFrame_DOM implements PPFrame {
                $first = true;
 
                foreach ( $args as $root ) {
-                       if ( $root instanceof PPNode_DOM ) $root = $root->node;
+                       if ( $root instanceof PPNode_DOM ) {
+                               $root = $root->node;
+                       }
                        if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
                                $root = array( $root );
                        }
@@ -1147,6 +1225,18 @@ class PPFrame_DOM implements PPFrame {
                }
        }
 
+       function getArguments() {
+               return array();
+       }
+
+       function getNumberedArguments() {
+               return array();
+       }
+
+       function getNamedArguments() {
+               return array();
+       }
+
        /**
         * Returns true if there are no arguments in this frame
         */
@@ -1182,8 +1272,8 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
        var $numberedExpansionCache, $namedExpansionCache;
 
        function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
-               $this->preprocessor = $preprocessor;
-               $this->parser = $preprocessor->parser;
+               parent::__construct( $preprocessor );
+
                $this->parent = $parent;
                $this->numberedArgs = $numberedArgs;
                $this->namedArgs = $namedArgs;
@@ -1254,7 +1344,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
                }
                if ( !isset( $this->numberedExpansionCache[$index] ) ) {
                        # No trimming for unnamed arguments
-                       $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
+                       $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], PPFrame::STRIP_COMMENTS );
                }
                return $this->numberedExpansionCache[$index];
        }
@@ -1266,7 +1356,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
                if ( !isset( $this->namedExpansionCache[$name] ) ) {
                        # Trim named arguments post-expand, for backwards compatibility
                        $this->namedExpansionCache[$name] = trim(
-                               $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
+                               $this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) );
                }
                return $this->namedExpansionCache[$name];
        }
@@ -1295,8 +1385,7 @@ class PPCustomFrame_DOM extends PPFrame_DOM {
        var $args;
 
        function __construct( $preprocessor, $args ) {
-               $this->preprocessor = $preprocessor;
-               $this->parser = $preprocessor->parser;
+               parent::__construct( $preprocessor );
                $this->args = $args;
        }