]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/specials/SpecialExport.php
MediaWiki 1.17.0
[autoinstalls/mediawiki.git] / includes / specials / SpecialExport.php
1 <?php
2 /**
3  * Implements Special:Export
4  *
5  * Copyright © 2003-2008 Brion Vibber <brion@pobox.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  * http://www.gnu.org/copyleft/gpl.html
21  *
22  * @file
23  * @ingroup SpecialPage
24  */
25
26 /**
27  * A special page that allows users to export pages in a XML file
28  *
29  * @ingroup SpecialPage
30  */
31 class SpecialExport extends SpecialPage {
32
33         private $curonly, $doExport, $pageLinkDepth, $templates;
34         private $images;
35
36         public function __construct() {
37                 parent::__construct( 'Export' );
38         }
39
40         public function execute( $par ) {
41                 global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
42                 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
43                 global $wgExportFromNamespaces, $wgUser;
44
45                 $this->setHeaders();
46                 $this->outputHeader();
47
48                 // Set some variables
49                 $this->curonly = true;
50                 $this->doExport = false;
51                 $this->templates = $wgRequest->getCheck( 'templates' );
52                 $this->images = $wgRequest->getCheck( 'images' ); // Doesn't do anything yet
53                 $this->pageLinkDepth = $this->validateLinkDepth(
54                         $wgRequest->getIntOrNull( 'pagelink-depth' )
55                 );
56                 $nsindex = '';
57
58                 if ( $wgRequest->getCheck( 'addcat' ) ) {
59                         $page = $wgRequest->getText( 'pages' );
60                         $catname = $wgRequest->getText( 'catname' );
61
62                         if ( $catname !== '' && $catname !== null && $catname !== false ) {
63                                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
64                                 if ( $t ) {
65                                         /**
66                                          * @todo Fixme: this can lead to hitting memory limit for very large
67                                          * categories. Ideally we would do the lookup synchronously
68                                          * during the export in a single query.
69                                          */
70                                         $catpages = $this->getPagesFromCategory( $t );
71                                         if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
72                                 }
73                         }
74                 }
75                 else if( $wgRequest->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
76                         $page = $wgRequest->getText( 'pages' );
77                         $nsindex = $wgRequest->getText( 'nsindex', '' );
78
79                         if ( strval( $nsindex ) !== ''  ) {
80                                 /**
81                                  * Same implementation as above, so same @todo
82                                  */
83                                 $nspages = $this->getPagesFromNamespace( $nsindex );
84                                 if ( $nspages ) $page .= "\n" . implode( "\n", $nspages );
85                         }
86                 }
87                 else if( $wgRequest->wasPosted() && $par == '' ) {
88                         $page = $wgRequest->getText( 'pages' );
89                         $this->curonly = $wgRequest->getCheck( 'curonly' );
90                         $rawOffset = $wgRequest->getVal( 'offset' );
91                         
92                         if( $rawOffset ) {
93                                 $offset = wfTimestamp( TS_MW, $rawOffset );
94                         } else {
95                                 $offset = null;
96                         }
97                         
98                         $limit = $wgRequest->getInt( 'limit' );
99                         $dir = $wgRequest->getVal( 'dir' );
100                         $history = array(
101                                 'dir' => 'asc',
102                                 'offset' => false,
103                                 'limit' => $wgExportMaxHistory,
104                         );
105                         $historyCheck = $wgRequest->getCheck( 'history' );
106                         
107                         if ( $this->curonly ) {
108                                 $history = WikiExporter::CURRENT;
109                         } elseif ( !$historyCheck ) {
110                                 if ( $limit > 0 && ($wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) {
111                                         $history['limit'] = $limit;
112                                 }
113                                 if ( !is_null( $offset ) ) {
114                                         $history['offset'] = $offset;
115                                 }
116                                 if ( strtolower( $dir ) == 'desc' ) {
117                                         $history['dir'] = 'desc';
118                                 }
119                         }
120
121                         if( $page != '' ) $this->doExport = true;
122                 } else {
123                         // Default to current-only for GET requests.
124                         $page = $wgRequest->getText( 'pages', $par );
125                         $historyCheck = $wgRequest->getCheck( 'history' );
126                         
127                         if( $historyCheck ) {
128                                 $history = WikiExporter::FULL;
129                         } else {
130                                 $history = WikiExporter::CURRENT;
131                         }
132
133                         if( $page != '' ) $this->doExport = true;
134                 }
135
136                 if( !$wgExportAllowHistory ) {
137                         // Override
138                         $history = WikiExporter::CURRENT;
139                 }
140
141                 $list_authors = $wgRequest->getCheck( 'listauthors' );
142                 if ( !$this->curonly || !$wgExportAllowListContributors ) $list_authors = false ;
143
144                 if ( $this->doExport ) {
145                         $wgOut->disable();
146                         
147                         // Cancel output buffering and gzipping if set
148                         // This should provide safer streaming for pages with history
149                         wfResetOutputBuffers();
150                         $wgRequest->response()->header( "Content-type: application/xml; charset=utf-8" );
151                         
152                         if( $wgRequest->getCheck( 'wpDownload' ) ) {
153                                 // Provide a sane filename suggestion
154                                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
155                                 $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
156                         }
157                         
158                         $this->doExport( $page, $history, $list_authors );
159                         
160                         return;
161                 }
162
163                 $wgOut->addWikiMsg( 'exporttext' );
164
165                 $form = Xml::openElement( 'form', array( 'method' => 'post',
166                         'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
167                 $form .= Xml::inputLabel( wfMsg( 'export-addcattext' )    , 'catname', 'catname', 40 ) . '&#160;';
168                 $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
169
170                 if ( $wgExportFromNamespaces ) {
171                         $form .= Xml::namespaceSelector( $nsindex, null, 'nsindex', wfMsg( 'export-addnstext' ) ) . '&#160;';
172                         $form .= Xml::submitButton( wfMsg( 'export-addns' ), array( 'name' => 'addns' ) ) . '<br />';
173                 }
174
175                 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
176                 $form .= '<br />';
177
178                 if( $wgExportAllowHistory ) {
179                         $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
180                 } else {
181                         $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
182                 }
183                 
184                 $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
185                 
186                 if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
187                         $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
188                 }
189                 // Enable this when we can do something useful exporting/importing image information. :)
190                 //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
191                 $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
192
193                 $form .= Xml::submitButton( wfMsg( 'export-submit' ), $wgUser->getSkin()->tooltipAndAccessKeyAttribs( 'export' ) );
194                 $form .= Xml::closeElement( 'form' );
195                 
196                 $wgOut->addHTML( $form );
197         }
198
199         private function userCanOverrideExportDepth() {
200                 global $wgUser;
201                 return $wgUser->isAllowed( 'override-export-depth' );
202         }
203
204         /**
205          * Do the actual page exporting
206          *
207          * @param $page String: user input on what page(s) to export
208          * @param $history Mixed: one of the WikiExporter history export constants
209          * @param $list_authors Boolean: Whether to add distinct author list (when
210          *                      not returning full history)
211          */
212         private function doExport( $page, $history, $list_authors ) {
213                 $pageSet = array(); // Inverted index of all pages to look up
214
215                 // Split up and normalize input
216                 foreach( explode( "\n", $page ) as $pageName ) {
217                         $pageName = trim( $pageName );
218                         $title = Title::newFromText( $pageName );
219                         if( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) {
220                                 // Only record each page once!
221                                 $pageSet[$title->getPrefixedText()] = true;
222                         }
223                 }
224
225                 // Set of original pages to pass on to further manipulation...
226                 $inputPages = array_keys( $pageSet );
227
228                 // Look up any linked pages if asked...
229                 if( $this->templates ) {
230                         $pageSet = $this->getTemplates( $inputPages, $pageSet );
231                 }
232                 $linkDepth = $this->pageLinkDepth;
233                 if( $linkDepth ) {
234                         $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
235                 }
236
237                 /*
238                  // Enable this when we can do something useful exporting/importing image information. :)
239                  if( $this->images ) ) {
240                  $pageSet = $this->getImages( $inputPages, $pageSet );
241                  }
242                  */
243
244                 $pages = array_keys( $pageSet );
245
246                 // Normalize titles to the same format and remove dupes, see bug 17374
247                 foreach( $pages as $k => $v ) {
248                         $pages[$k] = str_replace( " ", "_", $v );
249                 }
250                 
251                 $pages = array_unique( $pages );
252
253                 /* Ok, let's get to it... */
254                 if( $history == WikiExporter::CURRENT ) {
255                         $lb = false;
256                         $db = wfGetDB( DB_SLAVE );
257                         $buffer = WikiExporter::BUFFER;
258                 } else {
259                         // Use an unbuffered query; histories may be very long!
260                         $lb = wfGetLBFactory()->newMainLB();
261                         $db = $lb->getConnection( DB_SLAVE );
262                         $buffer = WikiExporter::STREAM;
263
264                         // This might take a while... :D
265                         wfSuppressWarnings();
266                         set_time_limit(0);
267                         wfRestoreWarnings();
268                 }
269                 
270                 $exporter = new WikiExporter( $db, $history, $buffer );
271                 $exporter->list_authors = $list_authors;
272                 $exporter->openStream();
273                 
274                 foreach( $pages as $page ) {
275                         /*
276                          if( $wgExportMaxHistory && !$this->curonly ) {
277                          $title = Title::newFromText( $page );
278                          if( $title ) {
279                          $count = Revision::countByTitle( $db, $title );
280                          if( $count > $wgExportMaxHistory ) {
281                          wfDebug( __FUNCTION__ .
282                          ": Skipped $page, $count revisions too big\n" );
283                          continue;
284                          }
285                          }
286                          }*/
287                         #Bug 8824: Only export pages the user can read
288                         $title = Title::newFromText( $page );
289                         if( is_null( $title ) ) continue; #TODO: perhaps output an <error> tag or something.
290                         if( !$title->userCanRead() ) continue; #TODO: perhaps output an <error> tag or something.
291
292                         $exporter->pageByTitle( $title );
293                 }
294
295                 $exporter->closeStream();
296                 
297                 if( $lb ) {
298                         $lb->closeAll();
299                 }
300         }
301
302         private function getPagesFromCategory( $title ) {
303                 global $wgContLang;
304
305                 $name = $title->getDBkey();
306
307                 $dbr = wfGetDB( DB_SLAVE );
308                 $res = $dbr->select(
309                         array( 'page', 'categorylinks' ),
310                         array( 'page_namespace', 'page_title' ),
311                         array( 'cl_from=page_id', 'cl_to' => $name ),
312                         __METHOD__,
313                         array( 'LIMIT' => '5000' )
314                 );
315
316                 $pages = array();
317                 
318                 foreach ( $res as $row ) {
319                         $n = $row->page_title;
320                         if ($row->page_namespace) {
321                                 $ns = $wgContLang->getNsText( $row->page_namespace );
322                                 $n = $ns . ':' . $n;
323                         }
324
325                         $pages[] = $n;
326                 }
327                 return $pages;
328         }
329
330         private function getPagesFromNamespace( $nsindex ) {
331                 global $wgContLang;
332
333                 $dbr = wfGetDB( DB_SLAVE );
334                 $res = $dbr->select(
335                         'page',
336                         array( 'page_namespace', 'page_title' ),
337                         array( 'page_namespace' => $nsindex ),
338                         __METHOD__,
339                         array( 'LIMIT' => '5000' )
340                 );
341
342                 $pages = array();
343                 
344                 foreach ( $res as $row ) {
345                         $n = $row->page_title;
346                         
347                         if ( $row->page_namespace ) {
348                                 $ns = $wgContLang->getNsText( $row->page_namespace );
349                                 $n = $ns . ':' . $n;
350                         }
351
352                         $pages[] = $n;
353                 }
354                 return $pages;
355         }
356
357         /**
358          * Expand a list of pages to include templates used in those pages.
359          * @param $inputPages array, list of titles to look up
360          * @param $pageSet array, associative array indexed by titles for output
361          * @return array associative array index by titles
362          */
363         private function getTemplates( $inputPages, $pageSet ) {
364                 return $this->getLinks( $inputPages, $pageSet,
365                         'templatelinks',
366                         array( 'tl_namespace AS namespace', 'tl_title AS title' ),
367                         array( 'page_id=tl_from' )
368                 );
369         }
370
371         /**
372          * Validate link depth setting, if available.
373          */
374         private function validateLinkDepth( $depth ) {
375                 global $wgExportMaxLinkDepth;
376                 
377                 if( $depth < 0 ) {
378                         return 0;
379                 }
380                 
381                 if ( !$this->userCanOverrideExportDepth() ) {
382                         if( $depth > $wgExportMaxLinkDepth ) {
383                                 return $wgExportMaxLinkDepth;
384                         }
385                 }
386                 
387                 /*
388                  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
389                  * crazy-big export from being done by someone setting the depth
390                  * number too high. In other words, last resort safety net.
391                  */
392                 return intval( min( $depth, 5 ) );
393         }
394
395         /** Expand a list of pages to include pages linked to from that page. */
396         private function getPageLinks( $inputPages, $pageSet, $depth ) {
397                 for(; $depth > 0; --$depth ) {
398                         $pageSet = $this->getLinks(
399                                 $inputPages, $pageSet, 'pagelinks',
400                                 array( 'pl_namespace AS namespace', 'pl_title AS title' ),
401                                 array( 'page_id=pl_from' )
402                         );
403                         $inputPages = array_keys( $pageSet );
404                 }
405                 
406                 return $pageSet;
407         }
408
409         /**
410          * Expand a list of pages to include images used in those pages.
411          * 
412          * @param $inputPages array, list of titles to look up
413          * @param $pageSet array, associative array indexed by titles for output
414          * 
415          * @return array associative array index by titles
416          */
417         private function getImages( $inputPages, $pageSet ) {
418                 return $this->getLinks(
419                         $inputPages,
420                         $pageSet,
421                         'imagelinks',
422                         array( NS_FILE . ' AS namespace', 'il_to AS title' ),
423                         array( 'page_id=il_from' )
424                 );
425         }
426
427         /**
428          * Expand a list of pages to include items used in those pages.
429          */
430         private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
431                 $dbr = wfGetDB( DB_SLAVE );
432                 
433                 foreach( $inputPages as $page ) {
434                         $title = Title::newFromText( $page );
435                         
436                         if( $title ) {
437                                 $pageSet[$title->getPrefixedText()] = true;
438                                 /// @todo Fixme: May or may not be more efficient to batch these
439                                 ///        by namespace when given multiple input pages.
440                                 $result = $dbr->select(
441                                         array( 'page', $table ),
442                                         $fields,
443                                         array_merge(
444                                                 $join,
445                                                 array(
446                                                         'page_namespace' => $title->getNamespace(),
447                                                         'page_title' => $title->getDBkey()
448                                                 )
449                                         ),
450                                         __METHOD__
451                                 );
452                                 
453                                 foreach( $result as $row ) {
454                                         $template = Title::makeTitle( $row->namespace, $row->title );
455                                         $pageSet[$template->getPrefixedText()] = true;
456                                 }
457                         }
458                 }
459                 
460                 return $pageSet;
461         }
462         
463 }