]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/Export.php
MediaWiki 1.16.4
[autoinstalls/mediawiki.git] / includes / Export.php
1 <?php
2 # Copyright (C) 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21  * @defgroup Dump Dump
22  */
23
24 /**
25  * @ingroup SpecialPage Dump
26  */
27 class WikiExporter {
28         var $list_authors = false ; # Return distinct author list (when not returning full history)
29         var $author_list = "" ;
30
31         var $dumpUploads = false;
32
33         const FULL = 1;
34         const CURRENT = 2;
35         const STABLE = 4; // extension defined
36         const LOGS = 8;
37
38         const BUFFER = 0;
39         const STREAM = 1;
40
41         const TEXT = 0;
42         const STUB = 1;
43
44         /**
45          * If using WikiExporter::STREAM to stream a large amount of data,
46          * provide a database connection which is not managed by
47          * LoadBalancer to read from: some history blob types will
48          * make additional queries to pull source data while the
49          * main query is still running.
50          *
51          * @param $db Database
52          * @param $history Mixed: one of WikiExporter::FULL or WikiExporter::CURRENT,
53          *                 or an associative array:
54          *                   offset: non-inclusive offset at which to start the query
55          *                   limit: maximum number of rows to return
56          *                   dir: "asc" or "desc" timestamp order
57          * @param $buffer Int: one of WikiExporter::BUFFER or WikiExporter::STREAM
58          * @param $text Int: one of WikiExporter::TEXT or WikiExporter::STUB
59          */
60         function __construct( &$db, $history = WikiExporter::CURRENT,
61                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
62                 $this->db =& $db;
63                 $this->history = $history;
64                 $this->buffer  = $buffer;
65                 $this->writer  = new XmlDumpWriter();
66                 $this->sink    = new DumpOutput();
67                 $this->text    = $text;
68         }
69
70         /**
71          * Set the DumpOutput or DumpFilter object which will receive
72          * various row objects and XML output for filtering. Filters
73          * can be chained or used as callbacks.
74          *
75          * @param $sink mixed
76          */
77         public function setOutputSink( &$sink ) {
78                 $this->sink =& $sink;
79         }
80
81         public function openStream() {
82                 $output = $this->writer->openStream();
83                 $this->sink->writeOpenStream( $output );
84         }
85
86         public function closeStream() {
87                 $output = $this->writer->closeStream();
88                 $this->sink->writeCloseStream( $output );
89         }
90
91         /**
92          * Dumps a series of page and revision records for all pages
93          * in the database, either including complete history or only
94          * the most recent version.
95          */
96         public function allPages() {
97                 return $this->dumpFrom( '' );
98         }
99
100         /**
101          * Dumps a series of page and revision records for those pages
102          * in the database falling within the page_id range given.
103          * @param $start Int: inclusive lower limit (this id is included)
104          * @param $end   Int: Exclusive upper limit (this id is not included)
105          *                   If 0, no upper limit.
106          */
107         public function pagesByRange( $start, $end ) {
108                 $condition = 'page_id >= ' . intval( $start );
109                 if( $end ) {
110                         $condition .= ' AND page_id < ' . intval( $end );
111                 }
112                 return $this->dumpFrom( $condition );
113         }
114
115         /**
116          * @param $title Title
117          */
118         public function pageByTitle( $title ) {
119                 return $this->dumpFrom(
120                         'page_namespace=' . $title->getNamespace() .
121                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
122         }
123
124         public function pageByName( $name ) {
125                 $title = Title::newFromText( $name );
126                 if( is_null( $title ) ) {
127                         return new WikiError( "Can't export invalid title" );
128                 } else {
129                         return $this->pageByTitle( $title );
130                 }
131         }
132
133         public function pagesByName( $names ) {
134                 foreach( $names as $name ) {
135                         $this->pageByName( $name );
136                 }
137         }
138
139         public function allLogs() {
140                 return $this->dumpFrom( '' );
141         }
142
143         public function logsByRange( $start, $end ) {
144                 $condition = 'log_id >= ' . intval( $start );
145                 if( $end ) {
146                         $condition .= ' AND log_id < ' . intval( $end );
147                 }
148                 return $this->dumpFrom( $condition );
149         }
150
151         # Generates the distinct list of authors of an article
152         # Not called by default (depends on $this->list_authors)
153         # Can be set by Special:Export when not exporting whole history
154         protected function do_list_authors( $page , $revision , $cond ) {
155                 $fname = "do_list_authors" ;
156                 wfProfileIn( $fname );
157                 $this->author_list = "<contributors>";
158                 //rev_deleted
159                 $nothidden = '('.$this->db->bitAnd('rev_deleted', Revision::DELETED_USER) . ') = 0';
160
161                 $sql = "SELECT DISTINCT rev_user_text,rev_user FROM {$page},{$revision} 
162                 WHERE page_id=rev_page AND $nothidden AND " . $cond ;
163                 $result = $this->db->query( $sql, $fname );
164                 $resultset = $this->db->resultObject( $result );
165                 while( $row = $resultset->fetchObject() ) {
166                         $this->author_list .= "<contributor>" .
167                                 "<username>" .
168                                 htmlentities( $row->rev_user_text )  .
169                                 "</username>" .
170                                 "<id>" .
171                                 $row->rev_user .
172                                 "</id>" .
173                                 "</contributor>";
174                 }
175                 wfProfileOut( $fname );
176                 $this->author_list .= "</contributors>";
177         }
178
179         protected function dumpFrom( $cond = '' ) {
180                 wfProfileIn( __METHOD__ );
181                 # For logging dumps...
182                 if( $this->history & self::LOGS ) {
183                         if( $this->buffer == WikiExporter::STREAM ) {
184                                 $prev = $this->db->bufferResults( false );
185                         }
186                         $where = array( 'user_id = log_user' );
187                         # Hide private logs
188                         $hideLogs = LogEventsList::getExcludeClause( $this->db );
189                         if( $hideLogs ) $where[] = $hideLogs;
190                         # Add on any caller specified conditions
191                         if( $cond ) $where[] = $cond;
192                         # Get logging table name for logging.* clause
193                         $logging = $this->db->tableName('logging');
194                         $result = $this->db->select( array('logging','user'), 
195                                 array( "{$logging}.*", 'user_name' ), // grab the user name
196                                 $where,
197                                 __METHOD__,
198                                 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array('logging' => 'PRIMARY') )
199                         );
200                         $wrapper = $this->db->resultObject( $result );
201                         $this->outputLogStream( $wrapper );
202                         if( $this->buffer == WikiExporter::STREAM ) {
203                                 $this->db->bufferResults( $prev );
204                         }
205                 # For page dumps...
206                 } else {
207                         $tables = array( 'page', 'revision' );
208                         $opts = array( 'ORDER BY' => 'page_id ASC' );
209                         $opts['USE INDEX'] = array();
210                         $join = array();
211                         if( is_array( $this->history ) ) {
212                                 # Time offset/limit for all pages/history...
213                                 $revJoin = 'page_id=rev_page';
214                                 # Set time order
215                                 if( $this->history['dir'] == 'asc' ) {
216                                         $op = '>';
217                                         $opts['ORDER BY'] = 'rev_timestamp ASC';
218                                 } else {
219                                         $op = '<';
220                                         $opts['ORDER BY'] = 'rev_timestamp DESC';
221                                 }
222                                 # Set offset
223                                 if( !empty( $this->history['offset'] ) ) {
224                                         $revJoin .= " AND rev_timestamp $op " .
225                                                 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
226                                 }
227                                 $join['revision'] = array('INNER JOIN',$revJoin);
228                                 # Set query limit
229                                 if( !empty( $this->history['limit'] ) ) {
230                                         $opts['LIMIT'] = intval( $this->history['limit'] );
231                                 }
232                         } elseif( $this->history & WikiExporter::FULL ) {
233                                 # Full history dumps...
234                                 $join['revision'] = array('INNER JOIN','page_id=rev_page');
235                         } elseif( $this->history & WikiExporter::CURRENT ) {
236                                 # Latest revision dumps...
237                                 if( $this->list_authors && $cond != '' )  { // List authors, if so desired
238                                         list($page,$revision) = $this->db->tableNamesN('page','revision');
239                                         $this->do_list_authors( $page, $revision, $cond );
240                                 }
241                                 $join['revision'] = array('INNER JOIN','page_id=rev_page AND page_latest=rev_id');
242                         } elseif( $this->history & WikiExporter::STABLE ) {
243                                 # "Stable" revision dumps...
244                                 # Default JOIN, to be overridden...
245                                 $join['revision'] = array('INNER JOIN','page_id=rev_page AND page_latest=rev_id');
246                                 # One, and only one hook should set this, and return false
247                                 if( wfRunHooks( 'WikiExporter::dumpStableQuery', array(&$tables,&$opts,&$join) ) ) {
248                                         wfProfileOut( __METHOD__ );
249                                         return new WikiError( __METHOD__." given invalid history dump type." );
250                                 }
251                         } else {
252                                 # Uknown history specification parameter?
253                                 wfProfileOut( __METHOD__ );
254                                 return new WikiError( __METHOD__." given invalid history dump type." );
255                         }
256                         # Query optimization hacks
257                         if( $cond == '' ) {
258                                 $opts[] = 'STRAIGHT_JOIN';
259                                 $opts['USE INDEX']['page'] = 'PRIMARY';
260                         }
261                         # Build text join options
262                         if( $this->text != WikiExporter::STUB ) { // 1-pass
263                                 $tables[] = 'text';
264                                 $join['text'] = array('INNER JOIN','rev_text_id=old_id');
265                         }
266
267                         if( $this->buffer == WikiExporter::STREAM ) {
268                                 $prev = $this->db->bufferResults( false );
269                         }
270                         
271                         wfRunHooks( 'ModifyExportQuery',
272                                                 array( $this->db, &$tables, &$cond, &$opts, &$join ) );
273
274                         # Do the query!
275                         $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
276                         $wrapper = $this->db->resultObject( $result );
277                         # Output dump results
278                         $this->outputPageStream( $wrapper );
279                         if( $this->list_authors ) {
280                                 $this->outputPageStream( $wrapper );
281                         }
282
283                         if( $this->buffer == WikiExporter::STREAM ) {
284                                 $this->db->bufferResults( $prev );
285                         }
286                 }
287                 wfProfileOut( __METHOD__ );
288         }
289
290         /**
291          * Runs through a query result set dumping page and revision records.
292          * The result set should be sorted/grouped by page to avoid duplicate
293          * page records in the output.
294          *
295          * The result set will be freed once complete. Should be safe for
296          * streaming (non-buffered) queries, as long as it was made on a
297          * separate database connection not managed by LoadBalancer; some
298          * blob storage types will make queries to pull source data.
299          *
300          * @param $resultset ResultWrapper
301          */
302         protected function outputPageStream( $resultset ) {
303                 $last = null;
304                 while( $row = $resultset->fetchObject() ) {
305                         if( is_null( $last ) ||
306                                 $last->page_namespace != $row->page_namespace ||
307                                 $last->page_title     != $row->page_title ) {
308                                 if( isset( $last ) ) {
309                                         $output = '';
310                                         if( $this->dumpUploads ) {
311                                                 $output .= $this->writer->writeUploads( $last );
312                                         }
313                                         $output .= $this->writer->closePage();
314                                         $this->sink->writeClosePage( $output );
315                                 }
316                                 $output = $this->writer->openPage( $row );
317                                 $this->sink->writeOpenPage( $row, $output );
318                                 $last = $row;
319                         }
320                         $output = $this->writer->writeRevision( $row );
321                         $this->sink->writeRevision( $row, $output );
322                 }
323                 if( isset( $last ) ) {
324                         $output = '';
325                         if( $this->dumpUploads ) {
326                                 $output .= $this->writer->writeUploads( $last );
327                         }
328                         $output .= $this->author_list;
329                         $output .= $this->writer->closePage();
330                         $this->sink->writeClosePage( $output );
331                 }
332         }
333         
334         protected function outputLogStream( $resultset ) {
335                 while( $row = $resultset->fetchObject() ) {
336                         $output = $this->writer->writeLogItem( $row );
337                         $this->sink->writeLogItem( $row, $output );
338                 }
339         }
340 }
341
342 /**
343  * @ingroup Dump
344  */
345 class XmlDumpWriter {
346
347         /**
348          * Returns the export schema version.
349          * @return string
350          */
351         function schemaVersion() {
352                 return "0.4";
353         }
354
355         /**
356          * Opens the XML output stream's root <mediawiki> element.
357          * This does not include an xml directive, so is safe to include
358          * as a subelement in a larger XML stream. Namespace and XML Schema
359          * references are included.
360          *
361          * Output will be encoded in UTF-8.
362          *
363          * @return string
364          */
365         function openStream() {
366                 global $wgContLanguageCode;
367                 $ver = $this->schemaVersion();
368                 return Xml::element( 'mediawiki', array(
369                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
370                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
371                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
372                                                 "http://www.mediawiki.org/xml/export-$ver.xsd",
373                         'version'            => $ver,
374                         'xml:lang'           => $wgContLanguageCode ),
375                         null ) .
376                         "\n" .
377                         $this->siteInfo();
378         }
379
380         function siteInfo() {
381                 $info = array(
382                         $this->sitename(),
383                         $this->homelink(),
384                         $this->generator(),
385                         $this->caseSetting(),
386                         $this->namespaces() );
387                 return "  <siteinfo>\n    " .
388                         implode( "\n    ", $info ) .
389                         "\n  </siteinfo>\n";
390         }
391
392         function sitename() {
393                 global $wgSitename;
394                 return Xml::element( 'sitename', array(), $wgSitename );
395         }
396
397         function generator() {
398                 global $wgVersion;
399                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
400         }
401
402         function homelink() {
403                 return Xml::element( 'base', array(), Title::newMainPage()->getFullUrl() );
404         }
405
406         function caseSetting() {
407                 global $wgCapitalLinks;
408                 // "case-insensitive" option is reserved for future
409                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
410                 return Xml::element( 'case', array(), $sensitivity );
411         }
412
413         function namespaces() {
414                 global $wgContLang;
415                 $spaces = "<namespaces>\n";
416                 foreach( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
417                         $spaces .= '      ' . 
418                                 Xml::element( 'namespace', 
419                                         array(  'key' => $ns,
420                                                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
421                                         ), $title ) . "\n";
422                 }
423                 $spaces .= "    </namespaces>";
424                 return $spaces;
425         }
426
427         /**
428          * Closes the output stream with the closing root element.
429          * Call when finished dumping things.
430          */
431         function closeStream() {
432                 return "</mediawiki>\n";
433         }
434
435
436         /**
437          * Opens a <page> section on the output stream, with data
438          * from the given database row.
439          *
440          * @param $row object
441          * @return string
442          * @access private
443          */
444         function openPage( $row ) {
445                 $out = "  <page>\n";
446                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
447                 $out .= '    ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n";
448                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
449                 if( $row->page_is_redirect ) {
450                         $out .= '    ' . Xml::element( 'redirect', array() ) . "\n";
451                 }
452                 if( $row->page_restrictions != '' ) {
453                         $out .= '    ' . Xml::element( 'restrictions', array(),
454                                 strval( $row->page_restrictions ) ) . "\n";
455                 }
456                 
457                 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
458                 
459                 return $out;
460         }
461
462         /**
463          * Closes a <page> section on the output stream.
464          *
465          * @access private
466          */
467         function closePage() {
468                 return "  </page>\n";
469         }
470
471         /**
472          * Dumps a <revision> section on the output stream, with
473          * data filled in from the given database row.
474          *
475          * @param $row object
476          * @return string
477          * @access private
478          */
479         function writeRevision( $row ) {
480                 $fname = 'WikiExporter::dumpRev';
481                 wfProfileIn( $fname );
482
483                 $out  = "    <revision>\n";
484                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
485
486                 $out .= $this->writeTimestamp( $row->rev_timestamp );
487
488                 if( $row->rev_deleted & Revision::DELETED_USER ) {
489                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
490                 } else {
491                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
492                 }
493
494                 if( $row->rev_minor_edit ) {
495                         $out .=  "      <minor/>\n";
496                 }
497                 if( $row->rev_deleted & Revision::DELETED_COMMENT ) {
498                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
499                 } elseif( $row->rev_comment != '' ) {
500                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n";
501                 }
502
503                 $text = '';
504                 if( $row->rev_deleted & Revision::DELETED_TEXT ) {
505                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
506                 } elseif( isset( $row->old_text ) ) {
507                         // Raw text from the database may have invalid chars
508                         $text = strval( Revision::getRevisionText( $row ) );
509                         $out .= "      " . Xml::elementClean( 'text',
510                                 array( 'xml:space' => 'preserve' ),
511                                 strval( $text ) ) . "\n";
512                 } else {
513                         // Stub output
514                         $out .= "      " . Xml::element( 'text',
515                                 array( 'id' => $row->rev_text_id ),
516                                 "" ) . "\n";
517                 }
518
519                 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
520
521                 $out .= "    </revision>\n";
522
523                 wfProfileOut( $fname );
524                 return $out;
525         }
526         
527         /**
528          * Dumps a <logitem> section on the output stream, with
529          * data filled in from the given database row.
530          *
531          * @param $row object
532          * @return string
533          * @access private
534          */
535         function writeLogItem( $row ) {
536                 $fname = 'WikiExporter::writeLogItem';
537                 wfProfileIn( $fname );
538
539                 $out  = "    <logitem>\n";
540                 $out .= "      " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
541
542                 $out .= $this->writeTimestamp( $row->log_timestamp );
543
544                 if( $row->log_deleted & LogPage::DELETED_USER ) {
545                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
546                 } else {
547                         $out .= $this->writeContributor( $row->log_user, $row->user_name );
548                 }
549
550                 if( $row->log_deleted & LogPage::DELETED_COMMENT ) {
551                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
552                 } elseif( $row->log_comment != '' ) {
553                         $out .= "      " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
554                 }
555                 
556                 $out .= "      " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
557                 $out .= "      " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
558
559                 if( $row->log_deleted & LogPage::DELETED_ACTION ) {
560                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
561                 } else {
562                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
563                         $out .= "      " . Xml::elementClean( 'logtitle', null, $title->getPrefixedText() ) . "\n";
564                         $out .= "      " . Xml::elementClean( 'params',
565                                 array( 'xml:space' => 'preserve' ),
566                                 strval( $row->log_params ) ) . "\n";
567                 }
568
569                 $out .= "    </logitem>\n";
570
571                 wfProfileOut( $fname );
572                 return $out;
573         }
574
575         function writeTimestamp( $timestamp ) {
576                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
577                 return "      " . Xml::element( 'timestamp', null, $ts ) . "\n";
578         }
579
580         function writeContributor( $id, $text ) {
581                 $out = "      <contributor>\n";
582                 if( $id ) {
583                         $out .= "        " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
584                         $out .= "        " . Xml::element( 'id', null, strval( $id ) ) . "\n";
585                 } else {
586                         $out .= "        " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
587                 }
588                 $out .= "      </contributor>\n";
589                 return $out;
590         }
591
592         /**
593          * Warning! This data is potentially inconsistent. :(
594          */
595         function writeUploads( $row ) {
596                 if( $row->page_namespace == NS_IMAGE ) {
597                         $img = wfFindFile( $row->page_title );
598                         if( $img ) {
599                                 $out = '';
600                                 foreach( array_reverse( $img->getHistory() ) as $ver ) {
601                                         $out .= $this->writeUpload( $ver );
602                                 }
603                                 $out .= $this->writeUpload( $img );
604                                 return $out;
605                         }
606                 }
607                 return '';
608         }
609
610         function writeUpload( $file ) {
611                 return "    <upload>\n" .
612                         $this->writeTimestamp( $file->getTimestamp() ) .
613                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
614                         "      " . Xml::elementClean( 'comment', null, $file->getDescription() ) . "\n" .
615                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
616                         "      " . Xml::element( 'src', null, $file->getFullUrl() ) . "\n" .
617                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
618                         "    </upload>\n";
619         }
620
621 }
622
623
624 /**
625  * Base class for output stream; prints to stdout or buffer or whereever.
626  * @ingroup Dump
627  */
628 class DumpOutput {
629         function writeOpenStream( $string ) {
630                 $this->write( $string );
631         }
632
633         function writeCloseStream( $string ) {
634                 $this->write( $string );
635         }
636
637         function writeOpenPage( $page, $string ) {
638                 $this->write( $string );
639         }
640
641         function writeClosePage( $string ) {
642                 $this->write( $string );
643         }
644
645         function writeRevision( $rev, $string ) {
646                 $this->write( $string );
647         }
648         
649         function writeLogItem( $rev, $string ) {
650                 $this->write( $string );
651         }
652
653         /**
654          * Override to write to a different stream type.
655          * @return bool
656          */
657         function write( $string ) {
658                 print $string;
659         }
660 }
661
662 /**
663  * Stream outputter to send data to a file.
664  * @ingroup Dump
665  */
666 class DumpFileOutput extends DumpOutput {
667         var $handle;
668
669         function DumpFileOutput( $file ) {
670                 $this->handle = fopen( $file, "wt" );
671         }
672
673         function write( $string ) {
674                 fputs( $this->handle, $string );
675         }
676 }
677
678 /**
679  * Stream outputter to send data to a file via some filter program.
680  * Even if compression is available in a library, using a separate
681  * program can allow us to make use of a multi-processor system.
682  * @ingroup Dump
683  */
684 class DumpPipeOutput extends DumpFileOutput {
685         function DumpPipeOutput( $command, $file = null ) {
686                 if( !is_null( $file ) ) {
687                         $command .=  " > " . wfEscapeShellArg( $file );
688                 }
689                 $this->handle = popen( $command, "w" );
690         }
691 }
692
693 /**
694  * Sends dump output via the gzip compressor.
695  * @ingroup Dump
696  */
697 class DumpGZipOutput extends DumpPipeOutput {
698         function DumpGZipOutput( $file ) {
699                 parent::DumpPipeOutput( "gzip", $file );
700         }
701 }
702
703 /**
704  * Sends dump output via the bgzip2 compressor.
705  * @ingroup Dump
706  */
707 class DumpBZip2Output extends DumpPipeOutput {
708         function DumpBZip2Output( $file ) {
709                 parent::DumpPipeOutput( "bzip2", $file );
710         }
711 }
712
713 /**
714  * Sends dump output via the p7zip compressor.
715  * @ingroup Dump
716  */
717 class Dump7ZipOutput extends DumpPipeOutput {
718         function Dump7ZipOutput( $file ) {
719                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
720                 // Suppress annoying useless crap from p7zip
721                 // Unfortunately this could suppress real error messages too
722                 $command .= ' >' . wfGetNull() . ' 2>&1';
723                 parent::DumpPipeOutput( $command );
724         }
725 }
726
727
728
729 /**
730  * Dump output filter class.
731  * This just does output filtering and streaming; XML formatting is done
732  * higher up, so be careful in what you do.
733  * @ingroup Dump
734  */
735 class DumpFilter {
736         function DumpFilter( &$sink ) {
737                 $this->sink =& $sink;
738         }
739
740         function writeOpenStream( $string ) {
741                 $this->sink->writeOpenStream( $string );
742         }
743
744         function writeCloseStream( $string ) {
745                 $this->sink->writeCloseStream( $string );
746         }
747
748         function writeOpenPage( $page, $string ) {
749                 $this->sendingThisPage = $this->pass( $page, $string );
750                 if( $this->sendingThisPage ) {
751                         $this->sink->writeOpenPage( $page, $string );
752                 }
753         }
754
755         function writeClosePage( $string ) {
756                 if( $this->sendingThisPage ) {
757                         $this->sink->writeClosePage( $string );
758                         $this->sendingThisPage = false;
759                 }
760         }
761
762         function writeRevision( $rev, $string ) {
763                 if( $this->sendingThisPage ) {
764                         $this->sink->writeRevision( $rev, $string );
765                 }
766         }
767         
768         function writeLogItem( $rev, $string ) {
769                 $this->sink->writeRevision( $rev, $string );
770         }       
771
772         /**
773          * Override for page-based filter types.
774          * @return bool
775          */
776         function pass( $page ) {
777                 return true;
778         }
779 }
780
781 /**
782  * Simple dump output filter to exclude all talk pages.
783  * @ingroup Dump
784  */
785 class DumpNotalkFilter extends DumpFilter {
786         function pass( $page ) {
787                 return !MWNamespace::isTalk( $page->page_namespace );
788         }
789 }
790
791 /**
792  * Dump output filter to include or exclude pages in a given set of namespaces.
793  * @ingroup Dump
794  */
795 class DumpNamespaceFilter extends DumpFilter {
796         var $invert = false;
797         var $namespaces = array();
798
799         function DumpNamespaceFilter( &$sink, $param ) {
800                 parent::DumpFilter( $sink );
801
802                 $constants = array(
803                         "NS_MAIN"           => NS_MAIN,
804                         "NS_TALK"           => NS_TALK,
805                         "NS_USER"           => NS_USER,
806                         "NS_USER_TALK"      => NS_USER_TALK,
807                         "NS_PROJECT"        => NS_PROJECT,
808                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
809                         "NS_FILE"           => NS_FILE,
810                         "NS_FILE_TALK"      => NS_FILE_TALK,
811                         "NS_IMAGE"          => NS_IMAGE,  // NS_IMAGE is an alias for NS_FILE
812                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
813                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
814                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
815                         "NS_TEMPLATE"       => NS_TEMPLATE,
816                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
817                         "NS_HELP"           => NS_HELP,
818                         "NS_HELP_TALK"      => NS_HELP_TALK,
819                         "NS_CATEGORY"       => NS_CATEGORY,
820                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
821
822                 if( $param{0} == '!' ) {
823                         $this->invert = true;
824                         $param = substr( $param, 1 );
825                 }
826
827                 foreach( explode( ',', $param ) as $key ) {
828                         $key = trim( $key );
829                         if( isset( $constants[$key] ) ) {
830                                 $ns = $constants[$key];
831                                 $this->namespaces[$ns] = true;
832                         } elseif( is_numeric( $key ) ) {
833                                 $ns = intval( $key );
834                                 $this->namespaces[$ns] = true;
835                         } else {
836                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
837                         }
838                 }
839         }
840
841         function pass( $page ) {
842                 $match = isset( $this->namespaces[$page->page_namespace] );
843                 return $this->invert xor $match;
844         }
845 }
846
847
848 /**
849  * Dump output filter to include only the last revision in each page sequence.
850  * @ingroup Dump
851  */
852 class DumpLatestFilter extends DumpFilter {
853         var $page, $pageString, $rev, $revString;
854
855         function writeOpenPage( $page, $string ) {
856                 $this->page = $page;
857                 $this->pageString = $string;
858         }
859
860         function writeClosePage( $string ) {
861                 if( $this->rev ) {
862                         $this->sink->writeOpenPage( $this->page, $this->pageString );
863                         $this->sink->writeRevision( $this->rev, $this->revString );
864                         $this->sink->writeClosePage( $string );
865                 }
866                 $this->rev = null;
867                 $this->revString = null;
868                 $this->page = null;
869                 $this->pageString = null;
870         }
871
872         function writeRevision( $rev, $string ) {
873                 if( $rev->rev_id == $this->page->page_latest ) {
874                         $this->rev = $rev;
875                         $this->revString = $string;
876                 }
877         }
878 }
879
880 /**
881  * Base class for output stream; prints to stdout or buffer or whereever.
882  * @ingroup Dump
883  */
884 class DumpMultiWriter {
885         function DumpMultiWriter( $sinks ) {
886                 $this->sinks = $sinks;
887                 $this->count = count( $sinks );
888         }
889
890         function writeOpenStream( $string ) {
891                 for( $i = 0; $i < $this->count; $i++ ) {
892                         $this->sinks[$i]->writeOpenStream( $string );
893                 }
894         }
895
896         function writeCloseStream( $string ) {
897                 for( $i = 0; $i < $this->count; $i++ ) {
898                         $this->sinks[$i]->writeCloseStream( $string );
899                 }
900         }
901
902         function writeOpenPage( $page, $string ) {
903                 for( $i = 0; $i < $this->count; $i++ ) {
904                         $this->sinks[$i]->writeOpenPage( $page, $string );
905                 }
906         }
907
908         function writeClosePage( $string ) {
909                 for( $i = 0; $i < $this->count; $i++ ) {
910                         $this->sinks[$i]->writeClosePage( $string );
911                 }
912         }
913
914         function writeRevision( $rev, $string ) {
915                 for( $i = 0; $i < $this->count; $i++ ) {
916                         $this->sinks[$i]->writeRevision( $rev, $string );
917                 }
918         }
919 }
920
921 function xmlsafe( $string ) {
922         $fname = 'xmlsafe';
923         wfProfileIn( $fname );
924
925         /**
926          * The page may contain old data which has not been properly normalized.
927          * Invalid UTF-8 sequences or forbidden control characters will make our
928          * XML output invalid, so be sure to strip them out.
929          */
930         $string = UtfNormal::cleanUp( $string );
931
932         $string = htmlspecialchars( $string );
933         wfProfileOut( $fname );
934         return $string;
935 }