]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blobdiff - maintenance/backupPrefetch.inc
MediaWiki 1.17.0
[autoinstalls/mediawiki.git] / maintenance / backupPrefetch.inc
index 512af1c73ae48a0f0b23c707c2ea1f8ee9222789..9d74313777c811678b4f1e59556a6b3493952db2 100644 (file)
@@ -1,42 +1,10 @@
 <?php
-
-// Some smart guy removed XMLReader's global constants from PHP 5.1
-// and replaced them with class constants. Breaking source compatibility
-// is SUPER awesome, and I love languages which do this constantly!
-$xmlReaderConstants = array(
-       "NONE",
-       "ELEMENT",
-       "ATTRIBUTE", 
-       "TEXT",
-       "CDATA",
-       "ENTITY_REF",
-       "ENTITY",
-       "PI",
-       "COMMENT",
-       "DOC",
-       "DOC_TYPE",
-       "DOC_FRAGMENT",
-       "NOTATION",
-       "WHITESPACE",
-       "SIGNIFICANT_WHITESPACE",
-       "END_ELEMENT",
-       "END_ENTITY",
-       "XML_DECLARATION",
-       "LOADDTD",
-       "DEFAULTATTRS",
-       "VALIDATE",
-       "SUBST_ENTITIES" );
-foreach( $xmlReaderConstants as $name ) {
-       $fullName = "XMLREADER_$name";
-       $newName = "XMLReader::$name";
-       if( !defined( $fullName ) ) {
-               if( defined( $newName ) ) {
-                       define( $fullName, constant( $newName ) );
-               } else {
-                       // broken or missing the extension...
-               }
-       }
-}
+/**
+ * Helper class for the --prefetch option of dumpTextPass.php
+ *
+ * @file
+ * @ingroup Maintenance
+ */
 
 /**
  * Readahead helper for making large MediaWiki data dumps;
@@ -51,7 +19,6 @@ foreach( $xmlReaderConstants as $name ) {
  * - text contents are immutable and should not change once
  *   recorded, so the previous dump is a reliable source
  *
- * Requires PHP 5 and the XMLReader PECL extension.
  * @ingroup Maintenance
  */
 class BaseDump {
@@ -60,9 +27,12 @@ class BaseDump {
        var $atPageEnd = false;
        var $lastPage = 0;
        var $lastRev = 0;
+       var $infiles = null;
 
        function BaseDump( $infile ) {
+               $this->infiles = explode(';',$infile);
                $this->reader = new XMLReader();
+               $infile = array_shift($this->infiles);
                $this->reader->open( $infile );
        }
 
@@ -71,26 +41,26 @@ class BaseDump {
         * from the dump stream. May return null if the page is
         * unavailable.
         *
-        * @param int $page ID number of page to read
-        * @param int $rev ID number of revision to read
+        * @param $page Integer: ID number of page to read
+        * @param $rev Integer: ID number of revision to read
         * @return string or null
         */
        function prefetch( $page, $rev ) {
                $page = intval( $page );
                $rev = intval( $rev );
-               while( $this->lastPage < $page && !$this->atEnd ) {
+               while ( $this->lastPage < $page && !$this->atEnd ) {
                        $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
                        $this->nextPage();
                }
-               if( $this->lastPage > $page || $this->atEnd ) {
+               if ( $this->lastPage > $page || $this->atEnd ) {
                        $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev  [$this->lastPage, $this->lastRev]" );
                        return null;
                }
-               while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
+               while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
                        $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
                        $this->nextRev();
                }
-               if( $this->lastRev == $rev && !$this->atEnd ) {
+               if ( $this->lastRev == $rev && !$this->atEnd ) {
                        $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
                        return $this->nextText();
                } else {
@@ -101,22 +71,27 @@ class BaseDump {
 
        function debug( $str ) {
                wfDebug( $str . "\n" );
-               //global $dumper;
-               //$dumper->progress( $str );
+               // global $dumper;
+               // $dumper->progress( $str );
        }
 
        /**
         * @access private
         */
        function nextPage() {
-               if( $this->skipTo( 'page', 'mediawiki' ) ) {
-                       if( $this->skipTo( 'id' ) ) {
+               if ( $this->skipTo( 'page', 'mediawiki' ) ) {
+                       if ( $this->skipTo( 'id' ) ) {
                                $this->lastPage = intval( $this->nodeContents() );
                                $this->lastRev = 0;
                                $this->atPageEnd = false;
                        }
                } else {
-                       $this->atEnd = true;
+                       $this->close();
+                       if (count($this->infiles)) {
+                               $infile = array_shift($this->infiles);
+                               $this->reader->open( $infile );
+                               $this->atEnd = false;
+                       }
                }
        }
 
@@ -124,8 +99,8 @@ class BaseDump {
         * @access private
         */
        function nextRev() {
-               if( $this->skipTo( 'revision' ) ) {
-                       if( $this->skipTo( 'id' ) ) {
+               if ( $this->skipTo( 'revision' ) ) {
+                       if ( $this->skipTo( 'id' ) ) {
                                $this->lastRev = intval( $this->nodeContents() );
                        }
                } else {
@@ -144,16 +119,16 @@ class BaseDump {
        /**
         * @access private
         */
-       function skipTo( $name, $parent='page' ) {
-               if( $this->atEnd ) {
+       function skipTo( $name, $parent = 'page' ) {
+               if ( $this->atEnd ) {
                        return false;
                }
-               while( $this->reader->read() ) {
-                       if( $this->reader->nodeType == XMLREADER_ELEMENT &&
+               while ( $this->reader->read() ) {
+                       if ( $this->reader->nodeType == XMLReader::ELEMENT &&
                                $this->reader->name == $name ) {
                                return true;
                        }
-                       if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
+                       if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
                                $this->reader->name == $parent ) {
                                $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
                                return false;
@@ -166,25 +141,26 @@ class BaseDump {
         * Shouldn't something like this be built-in to XMLReader?
         * Fetches text contents of the current element, assuming
         * no sub-elements or such scary things.
-        * @return string
+        *
+        * @return String
         * @access private
         */
        function nodeContents() {
-               if( $this->atEnd ) {
+               if ( $this->atEnd ) {
                        return null;
                }
-               if( $this->reader->isEmptyElement ) {
+               if ( $this->reader->isEmptyElement ) {
                        return "";
                }
                $buffer = "";
-               while( $this->reader->read() ) {
+               while ( $this->reader->read() ) {
                        switch( $this->reader->nodeType ) {
-                       case XMLREADER_TEXT:
-//                     case XMLREADER_WHITESPACE:
-                       case XMLREADER_SIGNIFICANT_WHITESPACE:
+                       case XMLReader::TEXT:
+//                     case XMLReader::WHITESPACE:
+                       case XMLReader::SIGNIFICANT_WHITESPACE:
                                $buffer .= $this->reader->value;
                                break;
-                       case XMLREADER_END_ELEMENT:
+                       case XMLReader::END_ELEMENT:
                                return $buffer;
                        }
                }