]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blobdiff - maintenance/backupPrefetch.inc
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / maintenance / backupPrefetch.inc
index 512af1c73ae48a0f0b23c707c2ea1f8ee9222789..6a2d3bf626565264dec0d1236dedb8673e1f1068 100644 (file)
@@ -1,42 +1,28 @@
 <?php
-
-// Some smart guy removed XMLReader's global constants from PHP 5.1
-// and replaced them with class constants. Breaking source compatibility
-// is SUPER awesome, and I love languages which do this constantly!
-$xmlReaderConstants = array(
-       "NONE",
-       "ELEMENT",
-       "ATTRIBUTE", 
-       "TEXT",
-       "CDATA",
-       "ENTITY_REF",
-       "ENTITY",
-       "PI",
-       "COMMENT",
-       "DOC",
-       "DOC_TYPE",
-       "DOC_FRAGMENT",
-       "NOTATION",
-       "WHITESPACE",
-       "SIGNIFICANT_WHITESPACE",
-       "END_ELEMENT",
-       "END_ENTITY",
-       "XML_DECLARATION",
-       "LOADDTD",
-       "DEFAULTATTRS",
-       "VALIDATE",
-       "SUBST_ENTITIES" );
-foreach( $xmlReaderConstants as $name ) {
-       $fullName = "XMLREADER_$name";
-       $newName = "XMLReader::$name";
-       if( !defined( $fullName ) ) {
-               if( defined( $newName ) ) {
-                       define( $fullName, constant( $newName ) );
-               } else {
-                       // broken or missing the extension...
-               }
-       }
-}
+/**
+ * Helper class for the --prefetch option of dumpTextPass.php
+ *
+ * Copyright © 2005 Brion Vibber <brion@pobox.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
 
 /**
  * Readahead helper for making large MediaWiki data dumps;
@@ -51,19 +37,26 @@ foreach( $xmlReaderConstants as $name ) {
  * - text contents are immutable and should not change once
  *   recorded, so the previous dump is a reliable source
  *
- * Requires PHP 5 and the XMLReader PECL extension.
  * @ingroup Maintenance
  */
 class BaseDump {
-       var $reader = null;
-       var $atEnd = false;
-       var $atPageEnd = false;
-       var $lastPage = 0;
-       var $lastRev = 0;
+       /** @var XMLReader */
+       protected $reader = null;
+       protected $atEnd = false;
+       protected $atPageEnd = false;
+       protected $lastPage = 0;
+       protected $lastRev = 0;
+       protected $infiles = null;
 
-       function BaseDump( $infile ) {
+       public function __construct( $infile ) {
+               $this->infiles = explode( ';', $infile );
                $this->reader = new XMLReader();
-               $this->reader->open( $infile );
+               $infile = array_shift( $this->infiles );
+               if ( defined( 'LIBXML_PARSEHUGE' ) ) {
+                       $this->reader->open( $infile, null, LIBXML_PARSEHUGE );
+               } else {
+                       $this->reader->open( $infile );
+               }
        }
 
        /**
@@ -73,50 +66,61 @@ class BaseDump {
         *
         * @param int $page ID number of page to read
         * @param int $rev ID number of revision to read
-        * @return string or null
+        * @return string|null
         */
        function prefetch( $page, $rev ) {
                $page = intval( $page );
                $rev = intval( $rev );
-               while( $this->lastPage < $page && !$this->atEnd ) {
+               while ( $this->lastPage < $page && !$this->atEnd ) {
                        $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
                        $this->nextPage();
                }
-               if( $this->lastPage > $page || $this->atEnd ) {
-                       $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev  [$this->lastPage, $this->lastRev]" );
+               if ( $this->lastPage > $page || $this->atEnd ) {
+                       $this->debug( "BaseDump::prefetch already past page $page "
+                               . "looking for rev $rev  [$this->lastPage, $this->lastRev]" );
+
                        return null;
                }
-               while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
-                       $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
+               while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
+                       $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, "
+                               . "looking for $page, $rev" );
                        $this->nextRev();
                }
-               if( $this->lastRev == $rev && !$this->atEnd ) {
+               if ( $this->lastRev == $rev && !$this->atEnd ) {
                        $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
+
                        return $this->nextText();
                } else {
-                       $this->debug( "BaseDump::prefetch already past rev $rev on page $page  [$this->lastPage, $this->lastRev]" );
+                       $this->debug( "BaseDump::prefetch already past rev $rev on page $page "
+                               . "[$this->lastPage, $this->lastRev]" );
+
                        return null;
                }
        }
 
        function debug( $str ) {
                wfDebug( $str . "\n" );
-               //global $dumper;
-               //$dumper->progress( $str );
+               // global $dumper;
+               // $dumper->progress( $str );
        }
 
        /**
         * @access private
         */
        function nextPage() {
-               if( $this->skipTo( 'page', 'mediawiki' ) ) {
-                       if( $this->skipTo( 'id' ) ) {
+               if ( $this->skipTo( 'page', 'mediawiki' ) ) {
+                       if ( $this->skipTo( 'id' ) ) {
                                $this->lastPage = intval( $this->nodeContents() );
                                $this->lastRev = 0;
                                $this->atPageEnd = false;
                        }
                } else {
-                       $this->atEnd = true;
+                       $this->close();
+                       if ( count( $this->infiles ) ) {
+                               $infile = array_shift( $this->infiles );
+                               $this->reader->open( $infile );
+                               $this->atEnd = false;
+                       }
                }
        }
 
@@ -124,8 +128,8 @@ class BaseDump {
         * @access private
         */
        function nextRev() {
-               if( $this->skipTo( 'revision' ) ) {
-                       if( $this->skipTo( 'id' ) ) {
+               if ( $this->skipTo( 'revision' ) ) {
+                       if ( $this->skipTo( 'id' ) ) {
                                $this->lastRev = intval( $this->nodeContents() );
                        }
                } else {
@@ -135,30 +139,39 @@ class BaseDump {
 
        /**
         * @access private
+        * @return string
         */
        function nextText() {
                $this->skipTo( 'text' );
+
                return strval( $this->nodeContents() );
        }
 
        /**
         * @access private
+        * @param string $name
+        * @param string $parent
+        * @return bool|null
         */
-       function skipTo( $name, $parent='page' ) {
-               if( $this->atEnd ) {
+       function skipTo( $name, $parent = 'page' ) {
+               if ( $this->atEnd ) {
                        return false;
                }
-               while( $this->reader->read() ) {
-                       if( $this->reader->nodeType == XMLREADER_ELEMENT &&
-                               $this->reader->name == $name ) {
+               while ( $this->reader->read() ) {
+                       if ( $this->reader->nodeType == XMLReader::ELEMENT
+                               && $this->reader->name == $name
+                       ) {
                                return true;
                        }
-                       if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
-                               $this->reader->name == $parent ) {
+                       if ( $this->reader->nodeType == XMLReader::END_ELEMENT
+                               && $this->reader->name == $parent
+                       ) {
                                $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
+
                                return false;
                        }
                }
+
                return $this->close();
        }
 
@@ -166,37 +179,41 @@ class BaseDump {
         * Shouldn't something like this be built-in to XMLReader?
         * Fetches text contents of the current element, assuming
         * no sub-elements or such scary things.
+        *
         * @return string
         * @access private
         */
        function nodeContents() {
-               if( $this->atEnd ) {
+               if ( $this->atEnd ) {
                        return null;
                }
-               if( $this->reader->isEmptyElement ) {
+               if ( $this->reader->isEmptyElement ) {
                        return "";
                }
                $buffer = "";
-               while( $this->reader->read() ) {
-                       switch( $this->reader->nodeType ) {
-                       case XMLREADER_TEXT:
-//                     case XMLREADER_WHITESPACE:
-                       case XMLREADER_SIGNIFICANT_WHITESPACE:
-                               $buffer .= $this->reader->value;
-                               break;
-                       case XMLREADER_END_ELEMENT:
-                               return $buffer;
+               while ( $this->reader->read() ) {
+                       switch ( $this->reader->nodeType ) {
+                               case XMLReader::TEXT:
+                               // case XMLReader::WHITESPACE:
+                               case XMLReader::SIGNIFICANT_WHITESPACE:
+                                       $buffer .= $this->reader->value;
+                                       break;
+                               case XMLReader::END_ELEMENT:
+                                       return $buffer;
                        }
                }
+
                return $this->close();
        }
 
        /**
         * @access private
+        * @return null
         */
        function close() {
                $this->reader->close();
                $this->atEnd = true;
+
                return null;
        }
 }