]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/Export.php
MediaWiki 1.15.0-scripts
[autoinstalls/mediawiki.git] / includes / Export.php
1 <?php
2 # Copyright (C) 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21  * @defgroup Dump Dump
22  */
23
24 /**
25  * @ingroup SpecialPage Dump
26  */
27 class WikiExporter {
28         var $list_authors = false ; # Return distinct author list (when not returning full history)
29         var $author_list = "" ;
30
31         var $dumpUploads = false;
32
33         const FULL = 1;
34         const CURRENT = 2;
35         const STABLE = 4; // extension defined
36         const LOGS = 8;
37
38         const BUFFER = 0;
39         const STREAM = 1;
40
41         const TEXT = 0;
42         const STUB = 1;
43
44         /**
45          * If using WikiExporter::STREAM to stream a large amount of data,
46          * provide a database connection which is not managed by
47          * LoadBalancer to read from: some history blob types will
48          * make additional queries to pull source data while the
49          * main query is still running.
50          *
51          * @param $db Database
52          * @param $history Mixed: one of WikiExporter::FULL or WikiExporter::CURRENT,
53          *                 or an associative array:
54          *                   offset: non-inclusive offset at which to start the query
55          *                   limit: maximum number of rows to return
56          *                   dir: "asc" or "desc" timestamp order
57          * @param $buffer Int: one of WikiExporter::BUFFER or WikiExporter::STREAM
58          */
59         function __construct( &$db, $history = WikiExporter::CURRENT,
60                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
61                 $this->db =& $db;
62                 $this->history = $history;
63                 $this->buffer  = $buffer;
64                 $this->writer  = new XmlDumpWriter();
65                 $this->sink    = new DumpOutput();
66                 $this->text    = $text;
67         }
68
69         /**
70          * Set the DumpOutput or DumpFilter object which will receive
71          * various row objects and XML output for filtering. Filters
72          * can be chained or used as callbacks.
73          *
74          * @param $sink mixed
75          */
76         public function setOutputSink( &$sink ) {
77                 $this->sink =& $sink;
78         }
79
80         public function openStream() {
81                 $output = $this->writer->openStream();
82                 $this->sink->writeOpenStream( $output );
83         }
84
85         public function closeStream() {
86                 $output = $this->writer->closeStream();
87                 $this->sink->writeCloseStream( $output );
88         }
89
90         /**
91          * Dumps a series of page and revision records for all pages
92          * in the database, either including complete history or only
93          * the most recent version.
94          */
95         public function allPages() {
96                 return $this->dumpFrom( '' );
97         }
98
99         /**
100          * Dumps a series of page and revision records for those pages
101          * in the database falling within the page_id range given.
102          * @param $start Int: inclusive lower limit (this id is included)
103          * @param $end   Int: Exclusive upper limit (this id is not included)
104          *                   If 0, no upper limit.
105          */
106         public function pagesByRange( $start, $end ) {
107                 $condition = 'page_id >= ' . intval( $start );
108                 if( $end ) {
109                         $condition .= ' AND page_id < ' . intval( $end );
110                 }
111                 return $this->dumpFrom( $condition );
112         }
113
114         /**
115          * @param $title Title
116          */
117         public function pageByTitle( $title ) {
118                 return $this->dumpFrom(
119                         'page_namespace=' . $title->getNamespace() .
120                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
121         }
122
123         public function pageByName( $name ) {
124                 $title = Title::newFromText( $name );
125                 if( is_null( $title ) ) {
126                         return new WikiError( "Can't export invalid title" );
127                 } else {
128                         return $this->pageByTitle( $title );
129                 }
130         }
131
132         public function pagesByName( $names ) {
133                 foreach( $names as $name ) {
134                         $this->pageByName( $name );
135                 }
136         }
137
138         public function allLogs() {
139                 return $this->dumpFrom( '' );
140         }
141
142         public function logsByRange( $start, $end ) {
143                 $condition = 'log_id >= ' . intval( $start );
144                 if( $end ) {
145                         $condition .= ' AND log_id < ' . intval( $end );
146                 }
147                 return $this->dumpFrom( $condition );
148         }
149
150         # Generates the distinct list of authors of an article
151         # Not called by default (depends on $this->list_authors)
152         # Can be set by Special:Export when not exporting whole history
153         protected function do_list_authors( $page , $revision , $cond ) {
154                 $fname = "do_list_authors" ;
155                 wfProfileIn( $fname );
156                 $this->author_list = "<contributors>";
157                 //rev_deleted
158                 $nothidden = '(rev_deleted & '.Revision::DELETED_USER.') = 0';
159
160                 $sql = "SELECT DISTINCT rev_user_text,rev_user FROM {$page},{$revision} 
161                 WHERE page_id=rev_page AND $nothidden AND " . $cond ;
162                 $result = $this->db->query( $sql, $fname );
163                 $resultset = $this->db->resultObject( $result );
164                 while( $row = $resultset->fetchObject() ) {
165                         $this->author_list .= "<contributor>" .
166                                 "<username>" .
167                                 htmlentities( $row->rev_user_text )  .
168                                 "</username>" .
169                                 "<id>" .
170                                 $row->rev_user .
171                                 "</id>" .
172                                 "</contributor>";
173                 }
174                 wfProfileOut( $fname );
175                 $this->author_list .= "</contributors>";
176         }
177
178         protected function dumpFrom( $cond = '' ) {
179                 wfProfileIn( __METHOD__ );
180                 # For logging dumps...
181                 if( $this->history & self::LOGS ) {
182                         if( $this->buffer == WikiExporter::STREAM ) {
183                                 $prev = $this->db->bufferResults( false );
184                         }
185                         $where = array( 'user_id = log_user' );
186                         # Hide private logs
187                         $hideLogs = LogEventsList::getExcludeClause( $this->db );
188                         if( $hideLogs ) $where[] = $hideLogs;
189                         # Add on any caller specified conditions
190                         if( $cond ) $where[] = $cond;
191                         # Get logging table name for logging.* clause
192                         $logging = $this->db->tableName('logging');
193                         $result = $this->db->select( array('logging','user'), 
194                                 array( "{$logging}.*", 'user_name' ), // grab the user name
195                                 $where,
196                                 __METHOD__,
197                                 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array('logging' => 'PRIMARY') )
198                         );
199                         $wrapper = $this->db->resultObject( $result );
200                         if( $this->buffer == WikiExporter::STREAM ) {
201                                 $this->db->bufferResults( $prev );
202                         }
203                         $this->outputLogStream( $wrapper );
204                 # For page dumps...
205                 } else {
206                         $tables = array( 'page', 'revision' );
207                         $opts = array( 'ORDER BY' => 'page_id ASC' );
208                         $opts['USE INDEX'] = array();
209                         $join = array();
210                         # Full history dumps...
211                         if( $this->history & WikiExporter::FULL ) {
212                                 $join['revision'] = array('INNER JOIN','page_id=rev_page');
213                         # Latest revision dumps...
214                         } elseif( $this->history & WikiExporter::CURRENT ) {
215                                 if( $this->list_authors && $cond != '' )  { // List authors, if so desired
216                                         list($page,$revision) = $this->db->tableNamesN('page','revision');
217                                         $this->do_list_authors( $page, $revision, $cond );
218                                 }
219                                 $join['revision'] = array('INNER JOIN','page_id=rev_page AND page_latest=rev_id');
220                         # "Stable" revision dumps...
221                         } elseif( $this->history & WikiExporter::STABLE ) {
222                                 # Default JOIN, to be overridden...
223                                 $join['revision'] = array('INNER JOIN','page_id=rev_page AND page_latest=rev_id');
224                                 # One, and only one hook should set this, and return false
225                                 if( wfRunHooks( 'WikiExporter::dumpStableQuery', array(&$tables,&$opts,&$join) ) ) {
226                                         wfProfileOut( __METHOD__ );
227                                         return new WikiError( __METHOD__." given invalid history dump type." );
228                                 }
229                         # Time offset/limit for all pages/history...
230                         } elseif( is_array( $this->history ) ) {
231                                 $revJoin = 'page_id=rev_page';
232                                 # Set time order
233                                 if( $this->history['dir'] == 'asc' ) {
234                                         $op = '>';
235                                         $opts['ORDER BY'] = 'rev_timestamp ASC';
236                                 } else {
237                                         $op = '<';
238                                         $opts['ORDER BY'] = 'rev_timestamp DESC';
239                                 }
240                                 # Set offset
241                                 if( !empty( $this->history['offset'] ) ) {
242                                         $revJoin .= " AND rev_timestamp $op " .
243                                                 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
244                                 }
245                                 $join['revision'] = array('INNER JOIN',$revJoin);
246                                 # Set query limit
247                                 if( !empty( $this->history['limit'] ) ) {
248                                         $opts['LIMIT'] = intval( $this->history['limit'] );
249                                 }
250                         # Uknown history specification parameter?
251                         } else {
252                                 wfProfileOut( __METHOD__ );
253                                 return new WikiError( __METHOD__." given invalid history dump type." );
254                         }
255                         # Query optimization hacks
256                         if( $cond == '' ) {
257                                 $opts[] = 'STRAIGHT_JOIN';
258                                 $opts['USE INDEX']['page'] = 'PRIMARY';
259                         }
260                         # Build text join options
261                         if( $this->text != WikiExporter::STUB ) { // 1-pass
262                                 $tables[] = 'text';
263                                 $join['text'] = array('INNER JOIN','rev_text_id=old_id');
264                         }
265
266                         if( $this->buffer == WikiExporter::STREAM ) {
267                                 $prev = $this->db->bufferResults( false );
268                         }
269
270                         # Do the query!
271                         $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
272                         $wrapper = $this->db->resultObject( $result );
273                         # Output dump results
274                         $this->outputPageStream( $wrapper );
275                         if( $this->list_authors ) {
276                                 $this->outputPageStream( $wrapper );
277                         }
278
279                         if( $this->buffer == WikiExporter::STREAM ) {
280                                 $this->db->bufferResults( $prev );
281                         }
282                 }
283                 wfProfileOut( __METHOD__ );
284         }
285
286         /**
287          * Runs through a query result set dumping page and revision records.
288          * The result set should be sorted/grouped by page to avoid duplicate
289          * page records in the output.
290          *
291          * The result set will be freed once complete. Should be safe for
292          * streaming (non-buffered) queries, as long as it was made on a
293          * separate database connection not managed by LoadBalancer; some
294          * blob storage types will make queries to pull source data.
295          *
296          * @param $resultset ResultWrapper
297          */
298         protected function outputPageStream( $resultset ) {
299                 $last = null;
300                 while( $row = $resultset->fetchObject() ) {
301                         if( is_null( $last ) ||
302                                 $last->page_namespace != $row->page_namespace ||
303                                 $last->page_title     != $row->page_title ) {
304                                 if( isset( $last ) ) {
305                                         $output = '';
306                                         if( $this->dumpUploads ) {
307                                                 $output .= $this->writer->writeUploads( $last );
308                                         }
309                                         $output .= $this->writer->closePage();
310                                         $this->sink->writeClosePage( $output );
311                                 }
312                                 $output = $this->writer->openPage( $row );
313                                 $this->sink->writeOpenPage( $row, $output );
314                                 $last = $row;
315                         }
316                         $output = $this->writer->writeRevision( $row );
317                         $this->sink->writeRevision( $row, $output );
318                 }
319                 if( isset( $last ) ) {
320                         $output = '';
321                         if( $this->dumpUploads ) {
322                                 $output .= $this->writer->writeUploads( $last );
323                         }
324                         $output .= $this->author_list;
325                         $output .= $this->writer->closePage();
326                         $this->sink->writeClosePage( $output );
327                 }
328                 $resultset->free();
329         }
330         
331         protected function outputLogStream( $resultset ) {
332                 while( $row = $resultset->fetchObject() ) {
333                         $output = $this->writer->writeLogItem( $row );
334                         $this->sink->writeLogItem( $row, $output );
335                 }
336                 $resultset->free();
337         }
338 }
339
340 /**
341  * @ingroup Dump
342  */
343 class XmlDumpWriter {
344
345         /**
346          * Returns the export schema version.
347          * @return string
348          */
349         function schemaVersion() {
350                 return "0.3"; // FIXME: upgrade to 0.4 when updated XSD is ready, for the revision deletion bits
351         }
352
353         /**
354          * Opens the XML output stream's root <mediawiki> element.
355          * This does not include an xml directive, so is safe to include
356          * as a subelement in a larger XML stream. Namespace and XML Schema
357          * references are included.
358          *
359          * Output will be encoded in UTF-8.
360          *
361          * @return string
362          */
363         function openStream() {
364                 global $wgContLanguageCode;
365                 $ver = $this->schemaVersion();
366                 return Xml::element( 'mediawiki', array(
367                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
368                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
369                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
370                                                 "http://www.mediawiki.org/xml/export-$ver.xsd",
371                         'version'            => $ver,
372                         'xml:lang'           => $wgContLanguageCode ),
373                         null ) .
374                         "\n" .
375                         $this->siteInfo();
376         }
377
378         function siteInfo() {
379                 $info = array(
380                         $this->sitename(),
381                         $this->homelink(),
382                         $this->generator(),
383                         $this->caseSetting(),
384                         $this->namespaces() );
385                 return "  <siteinfo>\n    " .
386                         implode( "\n    ", $info ) .
387                         "\n  </siteinfo>\n";
388         }
389
390         function sitename() {
391                 global $wgSitename;
392                 return Xml::element( 'sitename', array(), $wgSitename );
393         }
394
395         function generator() {
396                 global $wgVersion;
397                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
398         }
399
400         function homelink() {
401                 return Xml::element( 'base', array(), Title::newMainPage()->getFullUrl() );
402         }
403
404         function caseSetting() {
405                 global $wgCapitalLinks;
406                 // "case-insensitive" option is reserved for future
407                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
408                 return Xml::element( 'case', array(), $sensitivity );
409         }
410
411         function namespaces() {
412                 global $wgContLang;
413                 $spaces = "<namespaces>\n";
414                 foreach( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
415                         $spaces .= '      ' . Xml::element( 'namespace', array( 'key' => $ns ), $title ) . "\n";
416                 }
417                 $spaces .= "    </namespaces>";
418                 return $spaces;
419         }
420
421         /**
422          * Closes the output stream with the closing root element.
423          * Call when finished dumping things.
424          */
425         function closeStream() {
426                 return "</mediawiki>\n";
427         }
428
429
430         /**
431          * Opens a <page> section on the output stream, with data
432          * from the given database row.
433          *
434          * @param $row object
435          * @return string
436          * @access private
437          */
438         function openPage( $row ) {
439                 $out = "  <page>\n";
440                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
441                 $out .= '    ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n";
442                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
443                 if( '' != $row->page_restrictions ) {
444                         $out .= '    ' . Xml::element( 'restrictions', array(),
445                                 strval( $row->page_restrictions ) ) . "\n";
446                 }
447                 return $out;
448         }
449
450         /**
451          * Closes a <page> section on the output stream.
452          *
453          * @access private
454          */
455         function closePage() {
456                 return "  </page>\n";
457         }
458
459         /**
460          * Dumps a <revision> section on the output stream, with
461          * data filled in from the given database row.
462          *
463          * @param $row object
464          * @return string
465          * @access private
466          */
467         function writeRevision( $row ) {
468                 $fname = 'WikiExporter::dumpRev';
469                 wfProfileIn( $fname );
470
471                 $out  = "    <revision>\n";
472                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
473
474                 $out .= $this->writeTimestamp( $row->rev_timestamp );
475
476                 if( $row->rev_deleted & Revision::DELETED_USER ) {
477                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
478                 } else {
479                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
480                 }
481
482                 if( $row->rev_minor_edit ) {
483                         $out .=  "      <minor/>\n";
484                 }
485                 if( $row->rev_deleted & Revision::DELETED_COMMENT ) {
486                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
487                 } elseif( $row->rev_comment != '' ) {
488                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n";
489                 }
490
491                 if( $row->rev_deleted & Revision::DELETED_TEXT ) {
492                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
493                 } elseif( isset( $row->old_text ) ) {
494                         // Raw text from the database may have invalid chars
495                         $text = strval( Revision::getRevisionText( $row ) );
496                         $out .= "      " . Xml::elementClean( 'text',
497                                 array( 'xml:space' => 'preserve' ),
498                                 strval( $text ) ) . "\n";
499                 } else {
500                         // Stub output
501                         $out .= "      " . Xml::element( 'text',
502                                 array( 'id' => $row->rev_text_id ),
503                                 "" ) . "\n";
504                 }
505
506                 $out .= "    </revision>\n";
507
508                 wfProfileOut( $fname );
509                 return $out;
510         }
511         
512         /**
513          * Dumps a <logitem> section on the output stream, with
514          * data filled in from the given database row.
515          *
516          * @param $row object
517          * @return string
518          * @access private
519          */
520         function writeLogItem( $row ) {
521                 $fname = 'WikiExporter::writeLogItem';
522                 wfProfileIn( $fname );
523
524                 $out  = "    <logitem>\n";
525                 $out .= "      " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
526
527                 $out .= $this->writeTimestamp( $row->log_timestamp );
528
529                 if( $row->log_deleted & LogPage::DELETED_USER ) {
530                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
531                 } else {
532                         $out .= $this->writeContributor( $row->log_user, $row->user_name );
533                 }
534
535                 if( $row->log_deleted & LogPage::DELETED_COMMENT ) {
536                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
537                 } elseif( $row->log_comment != '' ) {
538                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
539                 }
540                 
541                 $out .= "      " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
542                 $out .= "      " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
543
544                 if( $row->log_deleted & LogPage::DELETED_ACTION ) {
545                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
546                 } else {
547                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
548                         $out .= "      " . Xml::elementClean( 'logtitle', null, $title->getPrefixedText() ) . "\n";
549                         $out .= "      " . Xml::elementClean( 'params',
550                                 array( 'xml:space' => 'preserve' ),
551                                 strval( $row->log_params ) ) . "\n";
552                 }
553
554                 $out .= "    </logitem>\n";
555
556                 wfProfileOut( $fname );
557                 return $out;
558         }
559
560         function writeTimestamp( $timestamp ) {
561                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
562                 return "      " . Xml::element( 'timestamp', null, $ts ) . "\n";
563         }
564
565         function writeContributor( $id, $text ) {
566                 $out = "      <contributor>\n";
567                 if( $id ) {
568                         $out .= "        " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
569                         $out .= "        " . Xml::element( 'id', null, strval( $id ) ) . "\n";
570                 } else {
571                         $out .= "        " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
572                 }
573                 $out .= "      </contributor>\n";
574                 return $out;
575         }
576
577         /**
578          * Warning! This data is potentially inconsistent. :(
579          */
580         function writeUploads( $row ) {
581                 if( $row->page_namespace == NS_IMAGE ) {
582                         $img = wfFindFile( $row->page_title );
583                         if( $img ) {
584                                 $out = '';
585                                 foreach( array_reverse( $img->getHistory() ) as $ver ) {
586                                         $out .= $this->writeUpload( $ver );
587                                 }
588                                 $out .= $this->writeUpload( $img );
589                                 return $out;
590                         }
591                 }
592                 return '';
593         }
594
595         function writeUpload( $file ) {
596                 return "    <upload>\n" .
597                         $this->writeTimestamp( $file->getTimestamp() ) .
598                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
599                         "      " . Xml::elementClean( 'comment', null, $file->getDescription() ) . "\n" .
600                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
601                         "      " . Xml::element( 'src', null, $file->getFullUrl() ) . "\n" .
602                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
603                         "    </upload>\n";
604         }
605
606 }
607
608
609 /**
610  * Base class for output stream; prints to stdout or buffer or whereever.
611  * @ingroup Dump
612  */
613 class DumpOutput {
614         function writeOpenStream( $string ) {
615                 $this->write( $string );
616         }
617
618         function writeCloseStream( $string ) {
619                 $this->write( $string );
620         }
621
622         function writeOpenPage( $page, $string ) {
623                 $this->write( $string );
624         }
625
626         function writeClosePage( $string ) {
627                 $this->write( $string );
628         }
629
630         function writeRevision( $rev, $string ) {
631                 $this->write( $string );
632         }
633         
634         function writeLogItem( $rev, $string ) {
635                 $this->write( $string );
636         }
637
638         /**
639          * Override to write to a different stream type.
640          * @return bool
641          */
642         function write( $string ) {
643                 print $string;
644         }
645 }
646
647 /**
648  * Stream outputter to send data to a file.
649  * @ingroup Dump
650  */
651 class DumpFileOutput extends DumpOutput {
652         var $handle;
653
654         function DumpFileOutput( $file ) {
655                 $this->handle = fopen( $file, "wt" );
656         }
657
658         function write( $string ) {
659                 fputs( $this->handle, $string );
660         }
661 }
662
663 /**
664  * Stream outputter to send data to a file via some filter program.
665  * Even if compression is available in a library, using a separate
666  * program can allow us to make use of a multi-processor system.
667  * @ingroup Dump
668  */
669 class DumpPipeOutput extends DumpFileOutput {
670         function DumpPipeOutput( $command, $file = null ) {
671                 if( !is_null( $file ) ) {
672                         $command .=  " > " . wfEscapeShellArg( $file );
673                 }
674                 $this->handle = popen( $command, "w" );
675         }
676 }
677
678 /**
679  * Sends dump output via the gzip compressor.
680  * @ingroup Dump
681  */
682 class DumpGZipOutput extends DumpPipeOutput {
683         function DumpGZipOutput( $file ) {
684                 parent::DumpPipeOutput( "gzip", $file );
685         }
686 }
687
688 /**
689  * Sends dump output via the bgzip2 compressor.
690  * @ingroup Dump
691  */
692 class DumpBZip2Output extends DumpPipeOutput {
693         function DumpBZip2Output( $file ) {
694                 parent::DumpPipeOutput( "bzip2", $file );
695         }
696 }
697
698 /**
699  * Sends dump output via the p7zip compressor.
700  * @ingroup Dump
701  */
702 class Dump7ZipOutput extends DumpPipeOutput {
703         function Dump7ZipOutput( $file ) {
704                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
705                 // Suppress annoying useless crap from p7zip
706                 // Unfortunately this could suppress real error messages too
707                 $command .= ' >' . wfGetNull() . ' 2>&1';
708                 parent::DumpPipeOutput( $command );
709         }
710 }
711
712
713
714 /**
715  * Dump output filter class.
716  * This just does output filtering and streaming; XML formatting is done
717  * higher up, so be careful in what you do.
718  * @ingroup Dump
719  */
720 class DumpFilter {
721         function DumpFilter( &$sink ) {
722                 $this->sink =& $sink;
723         }
724
725         function writeOpenStream( $string ) {
726                 $this->sink->writeOpenStream( $string );
727         }
728
729         function writeCloseStream( $string ) {
730                 $this->sink->writeCloseStream( $string );
731         }
732
733         function writeOpenPage( $page, $string ) {
734                 $this->sendingThisPage = $this->pass( $page, $string );
735                 if( $this->sendingThisPage ) {
736                         $this->sink->writeOpenPage( $page, $string );
737                 }
738         }
739
740         function writeClosePage( $string ) {
741                 if( $this->sendingThisPage ) {
742                         $this->sink->writeClosePage( $string );
743                         $this->sendingThisPage = false;
744                 }
745         }
746
747         function writeRevision( $rev, $string ) {
748                 if( $this->sendingThisPage ) {
749                         $this->sink->writeRevision( $rev, $string );
750                 }
751         }
752         
753         function writeLogItem( $rev, $string ) {
754                 $this->sink->writeRevision( $rev, $string );
755         }       
756
757         /**
758          * Override for page-based filter types.
759          * @return bool
760          */
761         function pass( $page ) {
762                 return true;
763         }
764 }
765
766 /**
767  * Simple dump output filter to exclude all talk pages.
768  * @ingroup Dump
769  */
770 class DumpNotalkFilter extends DumpFilter {
771         function pass( $page ) {
772                 return !MWNamespace::isTalk( $page->page_namespace );
773         }
774 }
775
776 /**
777  * Dump output filter to include or exclude pages in a given set of namespaces.
778  * @ingroup Dump
779  */
780 class DumpNamespaceFilter extends DumpFilter {
781         var $invert = false;
782         var $namespaces = array();
783
784         function DumpNamespaceFilter( &$sink, $param ) {
785                 parent::DumpFilter( $sink );
786
787                 $constants = array(
788                         "NS_MAIN"           => NS_MAIN,
789                         "NS_TALK"           => NS_TALK,
790                         "NS_USER"           => NS_USER,
791                         "NS_USER_TALK"      => NS_USER_TALK,
792                         "NS_PROJECT"        => NS_PROJECT,
793                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
794                         "NS_FILE"           => NS_FILE,
795                         "NS_FILE_TALK"      => NS_FILE_TALK,
796                         "NS_IMAGE"          => NS_IMAGE,  // NS_IMAGE is an alias for NS_FILE
797                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
798                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
799                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
800                         "NS_TEMPLATE"       => NS_TEMPLATE,
801                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
802                         "NS_HELP"           => NS_HELP,
803                         "NS_HELP_TALK"      => NS_HELP_TALK,
804                         "NS_CATEGORY"       => NS_CATEGORY,
805                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
806
807                 if( $param{0} == '!' ) {
808                         $this->invert = true;
809                         $param = substr( $param, 1 );
810                 }
811
812                 foreach( explode( ',', $param ) as $key ) {
813                         $key = trim( $key );
814                         if( isset( $constants[$key] ) ) {
815                                 $ns = $constants[$key];
816                                 $this->namespaces[$ns] = true;
817                         } elseif( is_numeric( $key ) ) {
818                                 $ns = intval( $key );
819                                 $this->namespaces[$ns] = true;
820                         } else {
821                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
822                         }
823                 }
824         }
825
826         function pass( $page ) {
827                 $match = isset( $this->namespaces[$page->page_namespace] );
828                 return $this->invert xor $match;
829         }
830 }
831
832
833 /**
834  * Dump output filter to include only the last revision in each page sequence.
835  * @ingroup Dump
836  */
837 class DumpLatestFilter extends DumpFilter {
838         var $page, $pageString, $rev, $revString;
839
840         function writeOpenPage( $page, $string ) {
841                 $this->page = $page;
842                 $this->pageString = $string;
843         }
844
845         function writeClosePage( $string ) {
846                 if( $this->rev ) {
847                         $this->sink->writeOpenPage( $this->page, $this->pageString );
848                         $this->sink->writeRevision( $this->rev, $this->revString );
849                         $this->sink->writeClosePage( $string );
850                 }
851                 $this->rev = null;
852                 $this->revString = null;
853                 $this->page = null;
854                 $this->pageString = null;
855         }
856
857         function writeRevision( $rev, $string ) {
858                 if( $rev->rev_id == $this->page->page_latest ) {
859                         $this->rev = $rev;
860                         $this->revString = $string;
861                 }
862         }
863 }
864
865 /**
866  * Base class for output stream; prints to stdout or buffer or whereever.
867  * @ingroup Dump
868  */
869 class DumpMultiWriter {
870         function DumpMultiWriter( $sinks ) {
871                 $this->sinks = $sinks;
872                 $this->count = count( $sinks );
873         }
874
875         function writeOpenStream( $string ) {
876                 for( $i = 0; $i < $this->count; $i++ ) {
877                         $this->sinks[$i]->writeOpenStream( $string );
878                 }
879         }
880
881         function writeCloseStream( $string ) {
882                 for( $i = 0; $i < $this->count; $i++ ) {
883                         $this->sinks[$i]->writeCloseStream( $string );
884                 }
885         }
886
887         function writeOpenPage( $page, $string ) {
888                 for( $i = 0; $i < $this->count; $i++ ) {
889                         $this->sinks[$i]->writeOpenPage( $page, $string );
890                 }
891         }
892
893         function writeClosePage( $string ) {
894                 for( $i = 0; $i < $this->count; $i++ ) {
895                         $this->sinks[$i]->writeClosePage( $string );
896                 }
897         }
898
899         function writeRevision( $rev, $string ) {
900                 for( $i = 0; $i < $this->count; $i++ ) {
901                         $this->sinks[$i]->writeRevision( $rev, $string );
902                 }
903         }
904 }
905
906 function xmlsafe( $string ) {
907         $fname = 'xmlsafe';
908         wfProfileIn( $fname );
909
910         /**
911          * The page may contain old data which has not been properly normalized.
912          * Invalid UTF-8 sequences or forbidden control characters will make our
913          * XML output invalid, so be sure to strip them out.
914          */
915         $string = UtfNormal::cleanUp( $string );
916
917         $string = htmlspecialchars( $string );
918         wfProfileOut( $fname );
919         return $string;
920 }