]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - includes/Export.php
MediaWiki 1.14.0
[autoinstallsdev/mediawiki.git] / includes / Export.php
1 <?php
2 # Copyright (C) 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21  * @defgroup Dump Dump
22  */
23
24 /**
25  * @ingroup SpecialPage Dump
26  */
27 class WikiExporter {
28         var $list_authors = false ; # Return distinct author list (when not returning full history)
29         var $author_list = "" ;
30
31         var $dumpUploads = false;
32
33         const FULL = 0;
34         const CURRENT = 1;
35         const LOGS = 2;
36
37         const BUFFER = 0;
38         const STREAM = 1;
39
40         const TEXT = 0;
41         const STUB = 1;
42
43         /**
44          * If using WikiExporter::STREAM to stream a large amount of data,
45          * provide a database connection which is not managed by
46          * LoadBalancer to read from: some history blob types will
47          * make additional queries to pull source data while the
48          * main query is still running.
49          *
50          * @param $db Database
51          * @param $history Mixed: one of WikiExporter::FULL or WikiExporter::CURRENT,
52          *                 or an associative array:
53          *                   offset: non-inclusive offset at which to start the query
54          *                   limit: maximum number of rows to return
55          *                   dir: "asc" or "desc" timestamp order
56          * @param $buffer Int: one of WikiExporter::BUFFER or WikiExporter::STREAM
57          */
58         function __construct( &$db, $history = WikiExporter::CURRENT,
59                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
60                 $this->db =& $db;
61                 $this->history = $history;
62                 $this->buffer  = $buffer;
63                 $this->writer  = new XmlDumpWriter();
64                 $this->sink    = new DumpOutput();
65                 $this->text    = $text;
66         }
67
68         /**
69          * Set the DumpOutput or DumpFilter object which will receive
70          * various row objects and XML output for filtering. Filters
71          * can be chained or used as callbacks.
72          *
73          * @param $sink mixed
74          */
75         public function setOutputSink( &$sink ) {
76                 $this->sink =& $sink;
77         }
78
79         public function openStream() {
80                 $output = $this->writer->openStream();
81                 $this->sink->writeOpenStream( $output );
82         }
83
84         public function closeStream() {
85                 $output = $this->writer->closeStream();
86                 $this->sink->writeCloseStream( $output );
87         }
88
89         /**
90          * Dumps a series of page and revision records for all pages
91          * in the database, either including complete history or only
92          * the most recent version.
93          */
94         public function allPages() {
95                 return $this->dumpFrom( '' );
96         }
97
98         /**
99          * Dumps a series of page and revision records for those pages
100          * in the database falling within the page_id range given.
101          * @param $start Int: inclusive lower limit (this id is included)
102          * @param $end   Int: Exclusive upper limit (this id is not included)
103          *                   If 0, no upper limit.
104          */
105         public function pagesByRange( $start, $end ) {
106                 $condition = 'page_id >= ' . intval( $start );
107                 if( $end ) {
108                         $condition .= ' AND page_id < ' . intval( $end );
109                 }
110                 return $this->dumpFrom( $condition );
111         }
112
113         /**
114          * @param $title Title
115          */
116         public function pageByTitle( $title ) {
117                 return $this->dumpFrom(
118                         'page_namespace=' . $title->getNamespace() .
119                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
120         }
121
122         public function pageByName( $name ) {
123                 $title = Title::newFromText( $name );
124                 if( is_null( $title ) ) {
125                         return new WikiError( "Can't export invalid title" );
126                 } else {
127                         return $this->pageByTitle( $title );
128                 }
129         }
130
131         public function pagesByName( $names ) {
132                 foreach( $names as $name ) {
133                         $this->pageByName( $name );
134                 }
135         }
136
137         public function allLogs() {
138                 return $this->dumpFrom( '' );
139         }
140
141         public function logsByRange( $start, $end ) {
142                 $condition = 'log_id >= ' . intval( $start );
143                 if( $end ) {
144                         $condition .= ' AND log_id < ' . intval( $end );
145                 }
146                 return $this->dumpFrom( $condition );
147         }
148
149         # Generates the distinct list of authors of an article
150         # Not called by default (depends on $this->list_authors)
151         # Can be set by Special:Export when not exporting whole history
152         protected function do_list_authors( $page , $revision , $cond ) {
153                 $fname = "do_list_authors" ;
154                 wfProfileIn( $fname );
155                 $this->author_list = "<contributors>";
156                 //rev_deleted
157                 $nothidden = '(rev_deleted & '.Revision::DELETED_USER.') = 0';
158
159                 $sql = "SELECT DISTINCT rev_user_text,rev_user FROM {$page},{$revision} 
160                 WHERE page_id=rev_page AND $nothidden AND " . $cond ;
161                 $result = $this->db->query( $sql, $fname );
162                 $resultset = $this->db->resultObject( $result );
163                 while( $row = $resultset->fetchObject() ) {
164                         $this->author_list .= "<contributor>" .
165                                 "<username>" .
166                                 htmlentities( $row->rev_user_text )  .
167                                 "</username>" .
168                                 "<id>" .
169                                 $row->rev_user .
170                                 "</id>" .
171                                 "</contributor>";
172                 }
173                 wfProfileOut( $fname );
174                 $this->author_list .= "</contributors>";
175         }
176
177         protected function dumpFrom( $cond = '' ) {
178                 $fname = 'WikiExporter::dumpFrom';
179                 wfProfileIn( $fname );
180                 
181                 # For logs dumps...
182                 if( $this->history & self::LOGS ) {
183                         $where = array( 'user_id = log_user' );
184                         # Hide private logs
185                         $where[] = LogEventsList::getExcludeClause( $this->db );
186                         if( $cond ) $where[] = $cond;
187                         $result = $this->db->select( array('logging','user'), 
188                                 '*',
189                                 $where,
190                                 $fname,
191                                 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array('logging' => 'PRIMARY') )
192                         );
193                         $wrapper = $this->db->resultObject( $result );
194                         $this->outputLogStream( $wrapper );
195                 # For page dumps...
196                 } else {
197                         list($page,$revision,$text) = $this->db->tableNamesN('page','revision','text');
198
199                         $order = 'ORDER BY page_id';
200                         $limit = '';
201
202                         if( $this->history == WikiExporter::FULL ) {
203                                 $join = 'page_id=rev_page';
204                         } elseif( $this->history == WikiExporter::CURRENT ) {
205                                 if ( $this->list_authors && $cond != '' )  { // List authors, if so desired
206                                         $this->do_list_authors ( $page , $revision , $cond );
207                                 }
208                                 $join = 'page_id=rev_page AND page_latest=rev_id';
209                         } elseif ( is_array( $this->history ) ) {
210                                 $join = 'page_id=rev_page';
211                                 if ( $this->history['dir'] == 'asc' ) {
212                                         $op = '>';
213                                         $order .= ', rev_timestamp';
214                                 } else {
215                                         $op = '<';
216                                         $order .= ', rev_timestamp DESC';
217                                 }
218                                 if ( !empty( $this->history['offset'] ) ) {
219                                         $join .= " AND rev_timestamp $op " . $this->db->addQuotes(
220                                                 $this->db->timestamp( $this->history['offset'] ) );
221                                 }
222                                 if ( !empty( $this->history['limit'] ) ) {
223                                         $limitNum = intval( $this->history['limit'] );
224                                         if ( $limitNum > 0 ) {
225                                                 $limit = "LIMIT $limitNum";
226                                         }
227                                 }
228                         } else {
229                                 wfProfileOut( $fname );
230                                 return new WikiError( "$fname given invalid history dump type." );
231                         }
232                         $where = ( $cond == '' ) ? '' : "$cond AND";
233
234                         if( $this->buffer == WikiExporter::STREAM ) {
235                                 $prev = $this->db->bufferResults( false );
236                         }
237                         if( $cond == '' ) {
238                                 // Optimization hack for full-database dump
239                                 $revindex = $pageindex = $this->db->useIndexClause("PRIMARY");
240                                 $straight = ' /*! STRAIGHT_JOIN */ ';
241                         } else {
242                                 $pageindex = '';
243                                 $revindex = '';
244                                 $straight = '';
245                         }
246                         if( $this->text == WikiExporter::STUB ) {
247                                 $sql = "SELECT $straight * FROM
248                                         $page $pageindex,
249                                         $revision $revindex
250                                         WHERE $where $join
251                                         $order $limit";
252                         } else {
253                                 $sql = "SELECT $straight * FROM
254                                         $page $pageindex,
255                                         $revision $revindex,
256                                         $text
257                                         WHERE $where $join AND rev_text_id=old_id
258                                         $order $limit";
259                         }
260                         $result = $this->db->query( $sql, $fname );
261                         $wrapper = $this->db->resultObject( $result );
262                         $this->outputPageStream( $wrapper );
263
264                         if ( $this->list_authors ) {
265                                 $this->outputPageStream( $wrapper );
266                         }
267
268                         if( $this->buffer == WikiExporter::STREAM ) {
269                                 $this->db->bufferResults( $prev );
270                         }
271                 }
272                 wfProfileOut( $fname );
273         }
274
275         /**
276          * Runs through a query result set dumping page and revision records.
277          * The result set should be sorted/grouped by page to avoid duplicate
278          * page records in the output.
279          *
280          * The result set will be freed once complete. Should be safe for
281          * streaming (non-buffered) queries, as long as it was made on a
282          * separate database connection not managed by LoadBalancer; some
283          * blob storage types will make queries to pull source data.
284          *
285          * @param $resultset ResultWrapper
286          */
287         protected function outputPageStream( $resultset ) {
288                 $last = null;
289                 while( $row = $resultset->fetchObject() ) {
290                         if( is_null( $last ) ||
291                                 $last->page_namespace != $row->page_namespace ||
292                                 $last->page_title     != $row->page_title ) {
293                                 if( isset( $last ) ) {
294                                         $output = '';
295                                         if( $this->dumpUploads ) {
296                                                 $output .= $this->writer->writeUploads( $last );
297                                         }
298                                         $output .= $this->writer->closePage();
299                                         $this->sink->writeClosePage( $output );
300                                 }
301                                 $output = $this->writer->openPage( $row );
302                                 $this->sink->writeOpenPage( $row, $output );
303                                 $last = $row;
304                         }
305                         $output = $this->writer->writeRevision( $row );
306                         $this->sink->writeRevision( $row, $output );
307                 }
308                 if( isset( $last ) ) {
309                         $output = '';
310                         if( $this->dumpUploads ) {
311                                 $output .= $this->writer->writeUploads( $last );
312                         }
313                         $output .= $this->author_list;
314                         $output .= $this->writer->closePage();
315                         $this->sink->writeClosePage( $output );
316                 }
317                 $resultset->free();
318         }
319         
320         protected function outputLogStream( $resultset ) {
321                 while( $row = $resultset->fetchObject() ) {
322                         $output = $this->writer->writeLogItem( $row );
323                         $this->sink->writeLogItem( $row, $output );
324                 }
325                 $resultset->free();
326         }
327 }
328
329 /**
330  * @ingroup Dump
331  */
332 class XmlDumpWriter {
333
334         /**
335          * Returns the export schema version.
336          * @return string
337          */
338         function schemaVersion() {
339                 return "0.3"; // FIXME: upgrade to 0.4 when updated XSD is ready, for the revision deletion bits
340         }
341
342         /**
343          * Opens the XML output stream's root <mediawiki> element.
344          * This does not include an xml directive, so is safe to include
345          * as a subelement in a larger XML stream. Namespace and XML Schema
346          * references are included.
347          *
348          * Output will be encoded in UTF-8.
349          *
350          * @return string
351          */
352         function openStream() {
353                 global $wgContLanguageCode;
354                 $ver = $this->schemaVersion();
355                 return Xml::element( 'mediawiki', array(
356                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
357                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
358                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
359                                                 "http://www.mediawiki.org/xml/export-$ver.xsd",
360                         'version'            => $ver,
361                         'xml:lang'           => $wgContLanguageCode ),
362                         null ) .
363                         "\n" .
364                         $this->siteInfo();
365         }
366
367         function siteInfo() {
368                 $info = array(
369                         $this->sitename(),
370                         $this->homelink(),
371                         $this->generator(),
372                         $this->caseSetting(),
373                         $this->namespaces() );
374                 return "  <siteinfo>\n    " .
375                         implode( "\n    ", $info ) .
376                         "\n  </siteinfo>\n";
377         }
378
379         function sitename() {
380                 global $wgSitename;
381                 return Xml::element( 'sitename', array(), $wgSitename );
382         }
383
384         function generator() {
385                 global $wgVersion;
386                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
387         }
388
389         function homelink() {
390                 return Xml::element( 'base', array(), Title::newMainPage()->getFullUrl() );
391         }
392
393         function caseSetting() {
394                 global $wgCapitalLinks;
395                 // "case-insensitive" option is reserved for future
396                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
397                 return Xml::element( 'case', array(), $sensitivity );
398         }
399
400         function namespaces() {
401                 global $wgContLang;
402                 $spaces = "  <namespaces>\n";
403                 foreach( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
404                         $spaces .= '      ' . Xml::element( 'namespace', array( 'key' => $ns ), $title ) . "\n";
405                 }
406                 $spaces .= "    </namespaces>";
407                 return $spaces;
408         }
409
410         /**
411          * Closes the output stream with the closing root element.
412          * Call when finished dumping things.
413          */
414         function closeStream() {
415                 return "</mediawiki>\n";
416         }
417
418
419         /**
420          * Opens a <page> section on the output stream, with data
421          * from the given database row.
422          *
423          * @param $row object
424          * @return string
425          * @access private
426          */
427         function openPage( $row ) {
428                 $out = "  <page>\n";
429                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
430                 $out .= '    ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n";
431                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
432                 if( '' != $row->page_restrictions ) {
433                         $out .= '    ' . Xml::element( 'restrictions', array(),
434                                 strval( $row->page_restrictions ) ) . "\n";
435                 }
436                 return $out;
437         }
438
439         /**
440          * Closes a <page> section on the output stream.
441          *
442          * @access private
443          */
444         function closePage() {
445                 return "  </page>\n";
446         }
447
448         /**
449          * Dumps a <revision> section on the output stream, with
450          * data filled in from the given database row.
451          *
452          * @param $row object
453          * @return string
454          * @access private
455          */
456         function writeRevision( $row ) {
457                 $fname = 'WikiExporter::dumpRev';
458                 wfProfileIn( $fname );
459
460                 $out  = "    <revision>\n";
461                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
462
463                 $out .= $this->writeTimestamp( $row->rev_timestamp );
464
465                 if( $row->rev_deleted & Revision::DELETED_USER ) {
466                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
467                 } else {
468                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
469                 }
470
471                 if( $row->rev_minor_edit ) {
472                         $out .=  "      <minor/>\n";
473                 }
474                 if( $row->rev_deleted & Revision::DELETED_COMMENT ) {
475                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
476                 } elseif( $row->rev_comment != '' ) {
477                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n";
478                 }
479
480                 if( $row->rev_deleted & Revision::DELETED_TEXT ) {
481                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
482                 } elseif( isset( $row->old_text ) ) {
483                         // Raw text from the database may have invalid chars
484                         $text = strval( Revision::getRevisionText( $row ) );
485                         $out .= "      " . Xml::elementClean( 'text',
486                                 array( 'xml:space' => 'preserve' ),
487                                 strval( $text ) ) . "\n";
488                 } else {
489                         // Stub output
490                         $out .= "      " . Xml::element( 'text',
491                                 array( 'id' => $row->rev_text_id ),
492                                 "" ) . "\n";
493                 }
494
495                 $out .= "    </revision>\n";
496
497                 wfProfileOut( $fname );
498                 return $out;
499         }
500         
501         /**
502          * Dumps a <logitem> section on the output stream, with
503          * data filled in from the given database row.
504          *
505          * @param $row object
506          * @return string
507          * @access private
508          */
509         function writeLogItem( $row ) {
510                 $fname = 'WikiExporter::writeLogItem';
511                 wfProfileIn( $fname );
512
513                 $out  = "    <logitem>\n";
514                 $out .= "      " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
515
516                 $out .= $this->writeTimestamp( $row->log_timestamp );
517
518                 if( $row->log_deleted & LogPage::DELETED_USER ) {
519                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
520                 } else {
521                         $out .= $this->writeContributor( $row->log_user, $row->user_name );
522                 }
523
524                 if( $row->log_deleted & LogPage::DELETED_COMMENT ) {
525                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
526                 } elseif( $row->log_comment != '' ) {
527                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
528                 }
529                 
530                 $out .= "      " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
531                 $out .= "      " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
532
533                 if( $row->log_deleted & LogPage::DELETED_ACTION ) {
534                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
535                 } else {
536                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
537                         $out .= "      " . Xml::elementClean( 'logtitle', null, $title->getPrefixedText() ) . "\n";
538                         $out .= "      " . Xml::elementClean( 'params',
539                                 array( 'xml:space' => 'preserve' ),
540                                 strval( $row->log_params ) ) . "\n";
541                 }
542
543                 $out .= "    </logitem>\n";
544
545                 wfProfileOut( $fname );
546                 return $out;
547         }
548
549         function writeTimestamp( $timestamp ) {
550                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
551                 return "      " . Xml::element( 'timestamp', null, $ts ) . "\n";
552         }
553
554         function writeContributor( $id, $text ) {
555                 $out = "      <contributor>\n";
556                 if( $id ) {
557                         $out .= "        " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
558                         $out .= "        " . Xml::element( 'id', null, strval( $id ) ) . "\n";
559                 } else {
560                         $out .= "        " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
561                 }
562                 $out .= "      </contributor>\n";
563                 return $out;
564         }
565
566         /**
567          * Warning! This data is potentially inconsistent. :(
568          */
569         function writeUploads( $row ) {
570                 if( $row->page_namespace == NS_IMAGE ) {
571                         $img = wfFindFile( $row->page_title );
572                         if( $img ) {
573                                 $out = '';
574                                 foreach( array_reverse( $img->getHistory() ) as $ver ) {
575                                         $out .= $this->writeUpload( $ver );
576                                 }
577                                 $out .= $this->writeUpload( $img );
578                                 return $out;
579                         }
580                 }
581                 return '';
582         }
583
584         function writeUpload( $file ) {
585                 return "    <upload>\n" .
586                         $this->writeTimestamp( $file->getTimestamp() ) .
587                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
588                         "      " . Xml::elementClean( 'comment', null, $file->getDescription() ) . "\n" .
589                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
590                         "      " . Xml::element( 'src', null, $file->getFullUrl() ) . "\n" .
591                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
592                         "    </upload>\n";
593         }
594
595 }
596
597
598 /**
599  * Base class for output stream; prints to stdout or buffer or whereever.
600  * @ingroup Dump
601  */
602 class DumpOutput {
603         function writeOpenStream( $string ) {
604                 $this->write( $string );
605         }
606
607         function writeCloseStream( $string ) {
608                 $this->write( $string );
609         }
610
611         function writeOpenPage( $page, $string ) {
612                 $this->write( $string );
613         }
614
615         function writeClosePage( $string ) {
616                 $this->write( $string );
617         }
618
619         function writeRevision( $rev, $string ) {
620                 $this->write( $string );
621         }
622         
623         function writeLogItem( $rev, $string ) {
624                 $this->write( $string );
625         }
626
627         /**
628          * Override to write to a different stream type.
629          * @return bool
630          */
631         function write( $string ) {
632                 print $string;
633         }
634 }
635
636 /**
637  * Stream outputter to send data to a file.
638  * @ingroup Dump
639  */
640 class DumpFileOutput extends DumpOutput {
641         var $handle;
642
643         function DumpFileOutput( $file ) {
644                 $this->handle = fopen( $file, "wt" );
645         }
646
647         function write( $string ) {
648                 fputs( $this->handle, $string );
649         }
650 }
651
652 /**
653  * Stream outputter to send data to a file via some filter program.
654  * Even if compression is available in a library, using a separate
655  * program can allow us to make use of a multi-processor system.
656  * @ingroup Dump
657  */
658 class DumpPipeOutput extends DumpFileOutput {
659         function DumpPipeOutput( $command, $file = null ) {
660                 if( !is_null( $file ) ) {
661                         $command .=  " > " . wfEscapeShellArg( $file );
662                 }
663                 $this->handle = popen( $command, "w" );
664         }
665 }
666
667 /**
668  * Sends dump output via the gzip compressor.
669  * @ingroup Dump
670  */
671 class DumpGZipOutput extends DumpPipeOutput {
672         function DumpGZipOutput( $file ) {
673                 parent::DumpPipeOutput( "gzip", $file );
674         }
675 }
676
677 /**
678  * Sends dump output via the bgzip2 compressor.
679  * @ingroup Dump
680  */
681 class DumpBZip2Output extends DumpPipeOutput {
682         function DumpBZip2Output( $file ) {
683                 parent::DumpPipeOutput( "bzip2", $file );
684         }
685 }
686
687 /**
688  * Sends dump output via the p7zip compressor.
689  * @ingroup Dump
690  */
691 class Dump7ZipOutput extends DumpPipeOutput {
692         function Dump7ZipOutput( $file ) {
693                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
694                 // Suppress annoying useless crap from p7zip
695                 // Unfortunately this could suppress real error messages too
696                 $command .= ' >' . wfGetNull() . ' 2>&1';
697                 parent::DumpPipeOutput( $command );
698         }
699 }
700
701
702
703 /**
704  * Dump output filter class.
705  * This just does output filtering and streaming; XML formatting is done
706  * higher up, so be careful in what you do.
707  * @ingroup Dump
708  */
709 class DumpFilter {
710         function DumpFilter( &$sink ) {
711                 $this->sink =& $sink;
712         }
713
714         function writeOpenStream( $string ) {
715                 $this->sink->writeOpenStream( $string );
716         }
717
718         function writeCloseStream( $string ) {
719                 $this->sink->writeCloseStream( $string );
720         }
721
722         function writeOpenPage( $page, $string ) {
723                 $this->sendingThisPage = $this->pass( $page, $string );
724                 if( $this->sendingThisPage ) {
725                         $this->sink->writeOpenPage( $page, $string );
726                 }
727         }
728
729         function writeClosePage( $string ) {
730                 if( $this->sendingThisPage ) {
731                         $this->sink->writeClosePage( $string );
732                         $this->sendingThisPage = false;
733                 }
734         }
735
736         function writeRevision( $rev, $string ) {
737                 if( $this->sendingThisPage ) {
738                         $this->sink->writeRevision( $rev, $string );
739                 }
740         }
741         
742         function writeLogItem( $rev, $string ) {
743                 $this->sink->writeRevision( $rev, $string );
744         }       
745
746         /**
747          * Override for page-based filter types.
748          * @return bool
749          */
750         function pass( $page ) {
751                 return true;
752         }
753 }
754
755 /**
756  * Simple dump output filter to exclude all talk pages.
757  * @ingroup Dump
758  */
759 class DumpNotalkFilter extends DumpFilter {
760         function pass( $page ) {
761                 return !MWNamespace::isTalk( $page->page_namespace );
762         }
763 }
764
765 /**
766  * Dump output filter to include or exclude pages in a given set of namespaces.
767  * @ingroup Dump
768  */
769 class DumpNamespaceFilter extends DumpFilter {
770         var $invert = false;
771         var $namespaces = array();
772
773         function DumpNamespaceFilter( &$sink, $param ) {
774                 parent::DumpFilter( $sink );
775
776                 $constants = array(
777                         "NS_MAIN"           => NS_MAIN,
778                         "NS_TALK"           => NS_TALK,
779                         "NS_USER"           => NS_USER,
780                         "NS_USER_TALK"      => NS_USER_TALK,
781                         "NS_PROJECT"        => NS_PROJECT,
782                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
783                         "NS_FILE"           => NS_FILE,
784                         "NS_FILE_TALK"      => NS_FILE_TALK,
785                         "NS_IMAGE"          => NS_IMAGE,  // NS_IMAGE is an alias for NS_FILE
786                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
787                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
788                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
789                         "NS_TEMPLATE"       => NS_TEMPLATE,
790                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
791                         "NS_HELP"           => NS_HELP,
792                         "NS_HELP_TALK"      => NS_HELP_TALK,
793                         "NS_CATEGORY"       => NS_CATEGORY,
794                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
795
796                 if( $param{0} == '!' ) {
797                         $this->invert = true;
798                         $param = substr( $param, 1 );
799                 }
800
801                 foreach( explode( ',', $param ) as $key ) {
802                         $key = trim( $key );
803                         if( isset( $constants[$key] ) ) {
804                                 $ns = $constants[$key];
805                                 $this->namespaces[$ns] = true;
806                         } elseif( is_numeric( $key ) ) {
807                                 $ns = intval( $key );
808                                 $this->namespaces[$ns] = true;
809                         } else {
810                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
811                         }
812                 }
813         }
814
815         function pass( $page ) {
816                 $match = isset( $this->namespaces[$page->page_namespace] );
817                 return $this->invert xor $match;
818         }
819 }
820
821
822 /**
823  * Dump output filter to include only the last revision in each page sequence.
824  * @ingroup Dump
825  */
826 class DumpLatestFilter extends DumpFilter {
827         var $page, $pageString, $rev, $revString;
828
829         function writeOpenPage( $page, $string ) {
830                 $this->page = $page;
831                 $this->pageString = $string;
832         }
833
834         function writeClosePage( $string ) {
835                 if( $this->rev ) {
836                         $this->sink->writeOpenPage( $this->page, $this->pageString );
837                         $this->sink->writeRevision( $this->rev, $this->revString );
838                         $this->sink->writeClosePage( $string );
839                 }
840                 $this->rev = null;
841                 $this->revString = null;
842                 $this->page = null;
843                 $this->pageString = null;
844         }
845
846         function writeRevision( $rev, $string ) {
847                 if( $rev->rev_id == $this->page->page_latest ) {
848                         $this->rev = $rev;
849                         $this->revString = $string;
850                 }
851         }
852 }
853
854 /**
855  * Base class for output stream; prints to stdout or buffer or whereever.
856  * @ingroup Dump
857  */
858 class DumpMultiWriter {
859         function DumpMultiWriter( $sinks ) {
860                 $this->sinks = $sinks;
861                 $this->count = count( $sinks );
862         }
863
864         function writeOpenStream( $string ) {
865                 for( $i = 0; $i < $this->count; $i++ ) {
866                         $this->sinks[$i]->writeOpenStream( $string );
867                 }
868         }
869
870         function writeCloseStream( $string ) {
871                 for( $i = 0; $i < $this->count; $i++ ) {
872                         $this->sinks[$i]->writeCloseStream( $string );
873                 }
874         }
875
876         function writeOpenPage( $page, $string ) {
877                 for( $i = 0; $i < $this->count; $i++ ) {
878                         $this->sinks[$i]->writeOpenPage( $page, $string );
879                 }
880         }
881
882         function writeClosePage( $string ) {
883                 for( $i = 0; $i < $this->count; $i++ ) {
884                         $this->sinks[$i]->writeClosePage( $string );
885                 }
886         }
887
888         function writeRevision( $rev, $string ) {
889                 for( $i = 0; $i < $this->count; $i++ ) {
890                         $this->sinks[$i]->writeRevision( $rev, $string );
891                 }
892         }
893 }
894
895 function xmlsafe( $string ) {
896         $fname = 'xmlsafe';
897         wfProfileIn( $fname );
898
899         /**
900          * The page may contain old data which has not been properly normalized.
901          * Invalid UTF-8 sequences or forbidden control characters will make our
902          * XML output invalid, so be sure to strip them out.
903          */
904         $string = UtfNormal::cleanUp( $string );
905
906         $string = htmlspecialchars( $string );
907         wfProfileOut( $fname );
908         return $string;
909 }