]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blobdiff - includes/specials/SpecialExport.php
MediaWiki 1.15.0
[autoinstalls/mediawiki.git] / includes / specials / SpecialExport.php
index 898b5a7851cfb869babad32f6cd2f15392a36c57..8bf16a71592ad944ec91e02792ccf750d508f7de 100644 (file)
  * @ingroup SpecialPage
  */
 
-function wfExportGetPagesFromCategory( $title ) {
-       global $wgContLang;
-
-       $name = $title->getDBkey();
-
-       $dbr = wfGetDB( DB_SLAVE );
-
-       list( $page, $categorylinks ) = $dbr->tableNamesN( 'page', 'categorylinks' );
-       $sql = "SELECT page_namespace, page_title FROM $page " .
-               "JOIN $categorylinks ON cl_from = page_id " .
-               "WHERE cl_to = " . $dbr->addQuotes( $name );
-
-       $pages = array();
-       $res = $dbr->query( $sql, 'wfExportGetPagesFromCategory' );
-       while ( $row = $dbr->fetchObject( $res ) ) {
-               $n = $row->page_title;
-               if ($row->page_namespace) {
-                       $ns = $wgContLang->getNsText( $row->page_namespace );
-                       $n = $ns . ':' . $n;
-               }
-
-               $pages[] = $n;
+class SpecialExport extends SpecialPage {
+       
+       private $curonly, $doExport, $pageLinkDepth, $templates;
+       private $images;
+       
+       public function __construct() {
+               parent::__construct( 'Export' );
        }
-       $dbr->freeResult($res);
-
-       return $pages;
-}
-
-/**
- * Expand a list of pages to include templates used in those pages.
- * @param $inputPages array, list of titles to look up
- * @param $pageSet array, associative array indexed by titles for output
- * @return array associative array index by titles
- */
-function wfExportGetTemplates( $inputPages, $pageSet ) {
-       return wfExportGetLinks( $inputPages, $pageSet,
-               'templatelinks',
-               array( 'tl_namespace AS namespace', 'tl_title AS title' ),
-               array( 'page_id=tl_from' ) );
-}
-
-/**
- * Expand a list of pages to include images used in those pages.
- * @param $inputPages array, list of titles to look up
- * @param $pageSet array, associative array indexed by titles for output
- * @return array associative array index by titles
- */
-function wfExportGetImages( $inputPages, $pageSet ) {
-       return wfExportGetLinks( $inputPages, $pageSet,
-               'imagelinks',
-               array( NS_FILE . ' AS namespace', 'il_to AS title' ),
-               array( 'page_id=il_from' ) );
-}
-
-/**
- * Expand a list of pages to include items used in those pages.
- * @private
- */
-function wfExportGetLinks( $inputPages, $pageSet, $table, $fields, $join ) {
-       $dbr = wfGetDB( DB_SLAVE );
-       foreach( $inputPages as $page ) {
-               $title = Title::newFromText( $page );
-               if( $title ) {
-                       $pageSet[$title->getPrefixedText()] = true;
-                       /// @fixme May or may not be more efficient to batch these
-                       ///        by namespace when given multiple input pages.
-                       $result = $dbr->select(
-                               array( 'page', $table ),
-                               $fields,
-                               array_merge( $join,
-                                       array(
-                                               'page_namespace' => $title->getNamespace(),
-                                               'page_title' => $title->getDBKey() ) ),
-                               __METHOD__ );
-                       foreach( $result as $row ) {
-                               $template = Title::makeTitle( $row->namespace, $row->title );
-                               $pageSet[$template->getPrefixedText()] = true;
+       
+       public function execute( $par ) {
+               global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
+               global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
+               global $wgExportFromNamespaces;
+               
+               $this->setHeaders();
+               $this->outputHeader();
+               
+               // Set some variables
+               $this->curonly = true;
+               $this->doExport = false;
+               $this->templates = $wgRequest->getCheck( 'templates' );
+               $this->images = $wgRequest->getCheck( 'images' ); // Doesn't do anything yet
+               $this->pageLinkDepth = $this->validateLinkDepth(
+                                                                                                               $wgRequest->getIntOrNull( 'pagelink-depth' ) );
+               
+               if ( $wgRequest->getCheck( 'addcat' ) ) {
+                       $page = $wgRequest->getText( 'pages' );
+                       $catname = $wgRequest->getText( 'catname' );
+                       
+                       if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
+                               $t = Title::makeTitleSafe( NS_MAIN, $catname );
+                               if ( $t ) {
+                                       /**
+                                        * @fixme This can lead to hitting memory limit for very large
+                                        * categories. Ideally we would do the lookup synchronously
+                                        * during the export in a single query.
+                                        */
+                                       $catpages = $this->getPagesFromCategory( $t );
+                                       if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
+                               }
                        }
                }
-       }
-       return $pageSet;
-}
-
-/**
- * Callback function to remove empty strings from the pages array.
- */
-function wfFilterPage( $page ) {
-       return $page !== '' && $page !== null;
-}
-
-/**
- *
- */
-function wfSpecialExport( $page = '' ) {
-       global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
-       global $wgExportAllowHistory, $wgExportMaxHistory;
-
-       $curonly = true;
-       $doexport = false;
-
-       if ( $wgRequest->getCheck( 'addcat' ) ) {
-               $page = $wgRequest->getText( 'pages' );
-               $catname = $wgRequest->getText( 'catname' );
-
-               if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
-                       $t = Title::makeTitleSafe( NS_MAIN, $catname );
-                       if ( $t ) {
+               else if( $wgRequest->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
+                       $page = $wgRequest->getText( 'pages' );
+                       $nsindex = $wgRequest->getText( 'nsindex' );
+                       
+                       if ( $nsindex !== '' && $nsindex !== NULL && $nsindex !== false ) {
                                /**
-                                * @fixme This can lead to hitting memory limit for very large
-                                * categories. Ideally we would do the lookup synchronously
-                                * during the export in a single query.
+                                * Same implementation as above, so same @fixme
                                 */
-                               $catpages = wfExportGetPagesFromCategory( $t );
-                               if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
-                       }
+                               $nspages = $this->getPagesFromNamespace( $nsindex );
+                               if ( $nspages ) $page .= "\n" . implode( "\n", $nspages );
+                       }       
                }
-       }
-       else if( $wgRequest->wasPosted() && $page == '' ) {
-               $page = $wgRequest->getText( 'pages' );
-               $curonly = $wgRequest->getCheck( 'curonly' );
-               $rawOffset = $wgRequest->getVal( 'offset' );
-               if( $rawOffset ) {
-                       $offset = wfTimestamp( TS_MW, $rawOffset );
+               else if( $wgRequest->wasPosted() && $par == '' ) {
+                       $page = $wgRequest->getText( 'pages' );
+                       $this->curonly = $wgRequest->getCheck( 'curonly' );
+                       $rawOffset = $wgRequest->getVal( 'offset' );
+                       if( $rawOffset ) {
+                               $offset = wfTimestamp( TS_MW, $rawOffset );
+                       } else {
+                               $offset = null;
+                       }
+                       $limit = $wgRequest->getInt( 'limit' );
+                       $dir = $wgRequest->getVal( 'dir' );
+                       $history = array(
+                                                        'dir' => 'asc',
+                                                        'offset' => false,
+                                                        'limit' => $wgExportMaxHistory,
+                                                        );
+                       $historyCheck = $wgRequest->getCheck( 'history' );
+                       if ( $this->curonly ) {
+                               $history = WikiExporter::CURRENT;
+                       } elseif ( !$historyCheck ) {
+                               if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
+                                       $history['limit'] = $limit;
+                               }
+                               if ( !is_null( $offset ) ) {
+                                       $history['offset'] = $offset;
+                               }
+                               if ( strtolower( $dir ) == 'desc' ) {
+                                       $history['dir'] = 'desc';
+                               }
+                       }
+                       
+                       if( $page != '' ) $this->doExport = true;
                } else {
-                       $offset = null;
+                       // Default to current-only for GET requests
+                       $page = $wgRequest->getText( 'pages', $par );
+                       $historyCheck = $wgRequest->getCheck( 'history' );
+                       if( $historyCheck ) {
+                               $history = WikiExporter::FULL;
+                       } else {
+                               $history = WikiExporter::CURRENT;
+                       }
+                       
+                       if( $page != '' ) $this->doExport = true;
                }
-               $limit = $wgRequest->getInt( 'limit' );
-               $dir = $wgRequest->getVal( 'dir' );
-               $history = array(
-                       'dir' => 'asc',
-                       'offset' => false,
-                       'limit' => $wgExportMaxHistory,
-               );
-               $historyCheck = $wgRequest->getCheck( 'history' );
-               if ( $curonly ) {
+               
+               if( !$wgExportAllowHistory ) {
+                       // Override
                        $history = WikiExporter::CURRENT;
-               } elseif ( !$historyCheck ) {
-                       if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
-                               $history['limit'] = $limit;
-                       }
-                       if ( !is_null( $offset ) ) {
-                               $history['offset'] = $offset;
-                       }
-                       if ( strtolower( $dir ) == 'desc' ) {
-                               $history['dir'] = 'desc';
+               }
+               
+               $list_authors = $wgRequest->getCheck( 'listauthors' );
+               if ( !$this->curonly || !$wgExportAllowListContributors ) $list_authors = false ;
+               
+               if ( $this->doExport ) {
+                       $wgOut->disable();
+                       // Cancel output buffering and gzipping if set
+                       // This should provide safer streaming for pages with history
+                       wfResetOutputBuffers();
+                       header( "Content-type: application/xml; charset=utf-8" );
+                       if( $wgRequest->getCheck( 'wpDownload' ) ) {
+                               // Provide a sane filename suggestion
+                               $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
+                               $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
                        }
+                       $this->doExport( $page, $history, $list_authors );
+                       return;
                }
-
-               if( $page != '' ) $doexport = true;
-       } else {
-               // Default to current-only for GET requests
-               $page = $wgRequest->getText( 'pages', $page );
-               $historyCheck = $wgRequest->getCheck( 'history' );
-               if( $historyCheck ) {
-                       $history = WikiExporter::FULL;
+               
+               $wgOut->addWikiMsg( 'exporttext' );
+               
+               $form = Xml::openElement( 'form', array( 'method' => 'post',
+                                                                                               'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
+               $form .= Xml::inputLabel( wfMsg( 'export-addcattext' )    , 'catname', 'catname', 40 ) . '&nbsp;';
+               $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
+               
+               if ( $wgExportFromNamespaces ) {
+                       $form .= Xml::namespaceSelector( '', null, 'nsindex', wfMsg( 'export-addnstext' ) ) . '&nbsp;';
+                       $form .= Xml::submitButton( wfMsg( 'export-addns' ), array( 'name' => 'addns' ) ) . '<br />';
+               }
+               
+               $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
+               $form .= '<br />';
+               
+               if( $wgExportAllowHistory ) {
+                       $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
                } else {
-                       $history = WikiExporter::CURRENT;
+                       $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
                }
-
-               if( $page != '' ) $doexport = true;
+               $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
+               if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
+                       $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
+               }
+               // Enable this when we can do something useful exporting/importing image information. :)
+               //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
+               $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
+               
+               $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
+               $form .= Xml::closeElement( 'form' );
+               $wgOut->addHTML( $form );
        }
+       
+       private function userCanOverrideExportDepth() {
+               global $wgUser;   
 
-       if( !$wgExportAllowHistory ) {
-               // Override
-               $history = WikiExporter::CURRENT;
+               return $wgUser->isAllowed( 'override-export-depth' );
        }
-
-       $list_authors = $wgRequest->getCheck( 'listauthors' );
-       if ( !$curonly || !$wgExportAllowListContributors ) $list_authors = false ;
-
-       if ( $doexport ) {
-               $wgOut->disable();
-
-               // Cancel output buffering and gzipping if set
-               // This should provide safer streaming for pages with history
-               wfResetOutputBuffers();
-               header( "Content-type: application/xml; charset=utf-8" );
-               if( $wgRequest->getCheck( 'wpDownload' ) ) {
-                       // Provide a sane filename suggestion
-                       $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
-                       $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
-               }
-
+       
+       /**
+        * Do the actual page exporting
+        * @param string $page User input on what page(s) to export
+        * @param mixed  $history one of the WikiExporter history export constants
+        */
+       private function doExport( $page, $history, $list_authors ) {
+               global $wgExportMaxHistory;
+               
                /* Split up the input and look up linked pages */
-               $inputPages = array_filter( explode( "\n", $page ), 'wfFilterPage' );
+               $inputPages = array_filter( explode( "\n", $page ), array( $this, 'filterPage' ) );
                $pageSet = array_flip( $inputPages );
-
-               if( $wgRequest->getCheck( 'templates' ) ) {
-                       $pageSet = wfExportGetTemplates( $inputPages, $pageSet );
+               
+               if( $this->templates ) {
+                       $pageSet = $this->getTemplates( $inputPages, $pageSet );
                }
-
-               /*
-               // Enable this when we can do something useful exporting/importing image information. :)
-               if( $wgRequest->getCheck( 'images' ) ) {
-                       $pageSet = wfExportGetImages( $inputPages, $pageSet );
+               
+               if( $linkDepth = $this->pageLinkDepth ) {
+                       $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
                }
-               */
-
+               
+               /*
+                // Enable this when we can do something useful exporting/importing image information. :)
+                if( $this->images ) ) {
+                $pageSet = $this->getImages( $inputPages, $pageSet );
+                }
+                */
+               
                $pages = array_keys( $pageSet );
-
+               
                /* Ok, let's get to it... */
-
                if( $history == WikiExporter::CURRENT ) {
                        $lb = false;
                        $db = wfGetDB( DB_SLAVE );
@@ -238,65 +227,177 @@ function wfSpecialExport( $page = '' ) {
                        set_time_limit(0);
                        wfRestoreWarnings();
                }
-
                $exporter = new WikiExporter( $db, $history, $buffer );
-               $exporter->list_authors = $list_authors ;
+               $exporter->list_authors = $list_authors;
                $exporter->openStream();
-
                foreach( $pages as $page ) {
                        /*
-                       if( $wgExportMaxHistory && !$curonly ) {
-                               $title = Title::newFromText( $page );
-                               if( $title ) {
-                                       $count = Revision::countByTitle( $db, $title );
-                                       if( $count > $wgExportMaxHistory ) {
-                                               wfDebug( __FUNCTION__ .
-                                                       ": Skipped $page, $count revisions too big\n" );
-                                               continue;
-                                       }
-                               }
-                       }*/
-
+                        if( $wgExportMaxHistory && !$this->curonly ) {
+                        $title = Title::newFromText( $page );
+                        if( $title ) {
+                        $count = Revision::countByTitle( $db, $title );
+                        if( $count > $wgExportMaxHistory ) {
+                        wfDebug( __FUNCTION__ .
+                        ": Skipped $page, $count revisions too big\n" );
+                        continue;
+                        }
+                        }
+                        }*/
                        #Bug 8824: Only export pages the user can read
                        $title = Title::newFromText( $page );
                        if( is_null( $title ) ) continue; #TODO: perhaps output an <error> tag or something.
                        if( !$title->userCanRead() ) continue; #TODO: perhaps output an <error> tag or something.
-
+                       
                        $exporter->pageByTitle( $title );
                }
-
+               
                $exporter->closeStream();
                if( $lb ) {
                        $lb->closeAll();
                }
-               return;
        }
-
-       $self = SpecialPage::getTitleFor( 'Export' );
-       $wgOut->addHTML( wfMsgExt( 'exporttext', 'parse' ) );
-
-       $form = Xml::openElement( 'form', array( 'method' => 'post',
-               'action' => $self->getLocalUrl( 'action=submit' ) ) );
-
-       $form .= Xml::inputLabel( wfMsg( 'export-addcattext' )  , 'catname', 'catname', 40 ) . '&nbsp;';
-       $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
-
-       $form .= Xml::openElement( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ) );
-       $form .= htmlspecialchars( $page );
-       $form .= Xml::closeElement( 'textarea' );
-       $form .= '<br />';
-
-       if( $wgExportAllowHistory ) {
-               $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
-       } else {
-               $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
+       
+       
+       private function getPagesFromCategory( $title ) {
+               global $wgContLang;
+               
+               $name = $title->getDBkey();
+               
+               $dbr = wfGetDB( DB_SLAVE );
+               $res = $dbr->select( array('page', 'categorylinks' ),
+                                                       array( 'page_namespace', 'page_title' ),
+                                                       array('cl_from=page_id', 'cl_to' => $name ),
+                                                       __METHOD__, array('LIMIT' => '5000'));
+               
+               $pages = array();
+               while ( $row = $dbr->fetchObject( $res ) ) {
+                       $n = $row->page_title;
+                       if ($row->page_namespace) {
+                               $ns = $wgContLang->getNsText( $row->page_namespace );
+                               $n = $ns . ':' . $n;
+                       }
+                       
+                       $pages[] = $n;
+               }
+               $dbr->freeResult($res);
+               
+               return $pages;
+       }
+       
+       private function getPagesFromNamespace( $nsindex ) {
+               global $wgContLang;
+               
+               $dbr = wfGetDB( DB_SLAVE );
+               $res = $dbr->select( 'page', array('page_namespace', 'page_title'),
+                                                       array('page_namespace' => $nsindex),
+                                                       __METHOD__, array('LIMIT' => '5000') );
+               
+               $pages = array();
+               while ( $row = $dbr->fetchObject( $res ) ) {
+                       $n = $row->page_title;
+                       if ($row->page_namespace) {
+                               $ns = $wgContLang->getNsText( $row->page_namespace );
+                               $n = $ns . ':' . $n;
+                       }
+                       
+                       $pages[] = $n;
+               }
+               $dbr->freeResult($res);
+               
+               return $pages;
+       }
+       /**
+        * Expand a list of pages to include templates used in those pages.
+        * @param $inputPages array, list of titles to look up
+        * @param $pageSet array, associative array indexed by titles for output
+        * @return array associative array index by titles
+        */
+       private function getTemplates( $inputPages, $pageSet ) {
+               return $this->getLinks( $inputPages, $pageSet,
+                                                          'templatelinks',
+                                                          array( 'tl_namespace AS namespace', 'tl_title AS title' ),
+                                                          array( 'page_id=tl_from' ) );
+       }
+       
+       /**
+        * Validate link depth setting, if available.
+        */
+       private function validateLinkDepth( $depth ) {
+               global $wgExportMaxLinkDepth, $wgExportMaxLinkDepthLimit;
+               if( $depth < 0 ) {
+                       return 0;
+               }
+               if ( !$this->userCanOverrideExportDepth() ) {
+                       if( $depth > $wgExportMaxLinkDepth ) {
+                               return $wgExportMaxLinkDepth;
+                       }
+               }
+               /*
+                * There's a HARD CODED limit of 5 levels of recursion here to prevent a
+                * crazy-big export from being done by someone setting the depth
+                * number too high. In other words, last resort safety net.
+                */
+               return intval( min( $depth, 5 ) );
+       }
+       
+       /** Expand a list of pages to include pages linked to from that page. */
+       private function getPageLinks( $inputPages, $pageSet, $depth ) {
+               for( $depth=$depth; $depth>0; --$depth ) {
+                       $pageSet = $this->getLinks( $inputPages, $pageSet, 'pagelinks',
+                                                                          array( 'pl_namespace AS namespace', 'pl_title AS title' ),
+                                                                          array( 'page_id=pl_from' ) );
+                       $inputPages = array_keys( $pageSet );
+               }
+               return $pageSet;
+       }
+       
+       /**
+        * Expand a list of pages to include images used in those pages.
+        * @param $inputPages array, list of titles to look up
+        * @param $pageSet array, associative array indexed by titles for output
+        * @return array associative array index by titles
+        */
+       private function getImages( $inputPages, $pageSet ) {
+               return $this->getLinks( $inputPages, $pageSet,
+                                                          'imagelinks',
+                                                          array( NS_FILE . ' AS namespace', 'il_to AS title' ),
+                                                          array( 'page_id=il_from' ) );
+       }
+       
+       /**
+        * Expand a list of pages to include items used in those pages.
+        * @private
+        */
+       private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
+               $dbr = wfGetDB( DB_SLAVE );
+               foreach( $inputPages as $page ) {
+                       $title = Title::newFromText( $page );
+                       if( $title ) {
+                               $pageSet[$title->getPrefixedText()] = true;
+                               /// @fixme May or may not be more efficient to batch these
+                               ///        by namespace when given multiple input pages.
+                               $result = $dbr->select(
+                                                                          array( 'page', $table ),
+                                                                          $fields,
+                                                                          array_merge( $join,
+                                                                                                  array(
+                                                                                                                'page_namespace' => $title->getNamespace(),
+                                                                                                                'page_title' => $title->getDBKey() ) ),
+                                                                          __METHOD__ );
+                               foreach( $result as $row ) {
+                                       $template = Title::makeTitle( $row->namespace, $row->title );
+                                       $pageSet[$template->getPrefixedText()] = true;
+                               }
+                       }
+               }
+               return $pageSet;
+       }
+       
+       /**
+        * Callback function to remove empty strings from the pages array.
+        */
+       private function filterPage( $page ) {
+               return $page !== '' && $page !== null;
        }
-       $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
-       // Enable this when we can do something useful exporting/importing image information. :)
-       //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
-       $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
-
-       $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
-       $form .= Xml::closeElement( 'form' );
-       $wgOut->addHTML( $form );
 }
+