]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blobdiff - includes/api/ApiQueryCategoryMembers.php
MediaWiki 1.17.4
[autoinstalls/mediawiki.git] / includes / api / ApiQueryCategoryMembers.php
index 45461abd9672d3ee9b09899ece8207b9ee952911..14beee0a816a703d70fa33f162ea9a7c75f21e4d 100644 (file)
@@ -1,11 +1,10 @@
 <?php
-
-/*
- * Created on June 14, 2007
- *
+/**
  * API for MediaWiki 1.8+
  *
- * Copyright (C) 2006 Yuri Astrakhan <Firstname><Lastname>@gmail.com
+ * Created on June 14, 2007
+ *
+ * Copyright © 2006 Yuri Astrakhan <Firstname><Lastname>@gmail.com
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
  */
 
-if (!defined('MEDIAWIKI')) {
+if ( !defined( 'MEDIAWIKI' ) ) {
        // Eclipse helper - will be ignored in production
-       require_once ("ApiQueryBase.php");
+       require_once( "ApiQueryBase.php" );
 }
 
 /**
@@ -35,8 +36,8 @@ if (!defined('MEDIAWIKI')) {
  */
 class ApiQueryCategoryMembers extends ApiQueryGeneratorBase {
 
-       public function __construct($query, $moduleName) {
-               parent :: __construct($query, $moduleName, 'cm');
+       public function __construct( $query, $moduleName ) {
+               parent::__construct( $query, $moduleName, 'cm' );
        }
 
        public function execute() {
@@ -47,189 +48,258 @@ class ApiQueryCategoryMembers extends ApiQueryGeneratorBase {
                return 'public';
        }
 
-       public function executeGenerator($resultPageSet) {
-               $this->run($resultPageSet);
+       public function executeGenerator( $resultPageSet ) {
+               $this->run( $resultPageSet );
        }
 
-       private function run($resultPageSet = null) {
-
+       private function run( $resultPageSet = null ) {
                $params = $this->extractRequestParams();
 
-               if ( !isset($params['title']) || is_null($params['title']) )
-                       $this->dieUsage("The cmtitle parameter is required", 'notitle');
-               $categoryTitle = Title::newFromText($params['title']);
+               $categoryTitle = Title::newFromText( $params['title'] );
 
-               if ( is_null( $categoryTitle ) || $categoryTitle->getNamespace() != NS_CATEGORY )
-                       $this->dieUsage("The category name you entered is not valid", 'invalidcategory');
+               if ( is_null( $categoryTitle ) || $categoryTitle->getNamespace() != NS_CATEGORY ) {
+                       $this->dieUsage( 'The category name you entered is not valid', 'invalidcategory' );
+               }
 
-               $prop = array_flip($params['prop']);
-               $fld_ids = isset($prop['ids']);
-               $fld_title = isset($prop['title']);
-               $fld_sortkey = isset($prop['sortkey']);
-               $fld_timestamp = isset($prop['timestamp']);
+               $prop = array_flip( $params['prop'] );
+               $fld_ids = isset( $prop['ids'] );
+               $fld_title = isset( $prop['title'] );
+               $fld_sortkey = isset( $prop['sortkey'] );
+               $fld_sortkeyprefix = isset( $prop['sortkeyprefix'] );
+               $fld_timestamp = isset( $prop['timestamp'] );
+               $fld_type = isset( $prop['type'] );
 
-               if (is_null($resultPageSet)) {
-                       $this->addFields(array('cl_from', 'cl_sortkey', 'page_namespace', 'page_title'));
-                       $this->addFieldsIf('page_id', $fld_ids);
+               if ( is_null( $resultPageSet ) ) {
+                       $this->addFields( array( 'cl_from', 'cl_sortkey', 'cl_type', 'page_namespace', 'page_title' ) );
+                       $this->addFieldsIf( 'page_id', $fld_ids );
+                       $this->addFieldsIf( 'cl_sortkey_prefix', $fld_sortkeyprefix );
                } else {
-                       $this->addFields($resultPageSet->getPageTableFields()); // will include page_ id, ns, title
-                       $this->addFields(array('cl_from', 'cl_sortkey'));
+                       $this->addFields( $resultPageSet->getPageTableFields() ); // will include page_ id, ns, title
+                       $this->addFields( array( 'cl_from', 'cl_sortkey', 'cl_type' ) );
                }
 
-               $this->addFieldsIf('cl_timestamp', $fld_timestamp || $params['sort'] == 'timestamp');
-               $this->addTables(array('page','categorylinks'));        // must be in this order for 'USE INDEX'
-                                                                       // Not needed after bug 10280 is applied to servers
-               if($params['sort'] == 'timestamp')
-                       $this->addOption('USE INDEX', 'cl_timestamp');
-               else
-                       $this->addOption('USE INDEX', 'cl_sortkey');
+               $this->addFieldsIf( 'cl_timestamp', $fld_timestamp || $params['sort'] == 'timestamp' );
+
+               $this->addTables( array( 'page', 'categorylinks' ) );   // must be in this order for 'USE INDEX'
+
+               $this->addWhereFld( 'cl_to', $categoryTitle->getDBkey() );
+               $queryTypes = $params['type'];
+               $contWhere = false;
 
-               $this->addWhere('cl_from=page_id');
-               $this->setContinuation($params['continue'], $params['dir']);
-               $this->addWhereFld('cl_to', $categoryTitle->getDBkey());
-               $this->addWhereFld('page_namespace', $params['namespace']);
-               if($params['sort'] == 'timestamp')
-                       $this->addWhereRange('cl_timestamp', ($params['dir'] == 'asc' ? 'newer' : 'older'), $params['start'], $params['end']);
-               else
-               {
-                       $this->addWhereRange('cl_sortkey', ($params['dir'] == 'asc' ? 'newer' : 'older'), $params['startsortkey'], $params['endsortkey']);
-                       $this->addWhereRange('cl_from', ($params['dir'] == 'asc' ? 'newer' : 'older'), null, null);
+               // Scanning large datasets for rare categories sucks, and I already told
+               // how to have efficient subcategory access :-) ~~~~ (oh well, domas)
+               global $wgMiserMode;
+               $miser_ns = array();
+               if ( $wgMiserMode ) {
+                       $miser_ns = $params['namespace'];
+               } else {
+                       $this->addWhereFld( 'page_namespace', $params['namespace'] );
                }
 
-               $limit = $params['limit'];
-               $this->addOption('LIMIT', $limit +1);
+               $dir = $params['dir'] == 'asc' ? 'newer' : 'older';
+
+               if ( $params['sort'] == 'timestamp' ) {
+                       $this->addWhereRange( 'cl_timestamp',
+                               $dir,
+                               $params['start'],
+                               $params['end'] );
 
-               $db = $this->getDB();
+                       $this->addOption( 'USE INDEX', 'cl_timestamp' );
+               } else {
+                       if ( $params['continue'] ) {
+                               $cont = explode( '|', $params['continue'], 3 );
+                               if ( count( $cont ) != 3 ) {
+                                       $this->dieUsage( 'Invalid continue param. You should pass the original value returned '.
+                                               'by the previous query', '_badcontinue'
+                                       );
+                               }
+                               
+                               // Remove the types to skip from $queryTypes
+                               $contTypeIndex = array_search( $cont[0], $queryTypes );
+                               $queryTypes = array_slice( $queryTypes, $contTypeIndex );
+                               
+                               // Add a WHERE clause for sortkey and from
+                               // pack( "H*", $foo ) is used to convert hex back to binary
+                               $escSortkey = $this->getDB()->addQuotes( pack( "H*", $cont[1] ) );
+                               $from = intval( $cont[2] );
+                               $op = $dir == 'newer' ? '>' : '<';
+                               // $contWhere is used further down
+                               $contWhere = "cl_sortkey $op $escSortkey OR " .
+                                       "(cl_sortkey = $escSortkey AND " .
+                                       "cl_from $op= $from)";
+                               
+                       } else {
+                               // The below produces ORDER BY cl_sortkey, cl_from, possibly with DESC added to each of them
+                               $this->addWhereRange( 'cl_sortkey',
+                                       $dir,
+                                       $params['startsortkey'],
+                                       $params['endsortkey'] );
+                               $this->addWhereRange( 'cl_from', $dir, null, null );
+                       }
+                       $this->addOption( 'USE INDEX', 'cl_sortkey' );
+               }
+
+               $this->addWhere( 'cl_from=page_id' );
+
+               $limit = $params['limit'];
+               $this->addOption( 'LIMIT', $limit + 1 );
 
-               $data = array ();
+               if ( $params['sort'] == 'sortkey' ) {
+                       // Run a separate SELECT query for each value of cl_type.
+                       // This is needed because cl_type is an enum, and MySQL has
+                       // inconsistencies between ORDER BY cl_type and
+                       // WHERE cl_type >= 'foo' making proper paging impossible
+                       // and unindexed.
+                       $rows = array();
+                       $first = true;
+                       foreach ( $queryTypes as $type ) {
+                               $extraConds = array( 'cl_type' => $type );
+                               if ( $first && $contWhere ) {
+                                       // Continuation condition. Only added to the
+                                       // first query, otherwise we'll skip things
+                                       $extraConds[] = $contWhere;
+                               }
+                               $res = $this->select( __METHOD__, array( 'where' => $extraConds ) );
+                               $rows = array_merge( $rows, iterator_to_array( $res ) );
+                               if ( count( $rows ) >= $limit + 1 ) {
+                                       break;
+                               }
+                               $first = false;
+                       }
+               } else {
+                       // Sorting by timestamp
+                       // No need to worry about per-type queries because we
+                       // aren't sorting or filtering by type anyway
+                       $res = $this->select( __METHOD__ );
+                       $rows = iterator_to_array( $res );
+               }
                $count = 0;
-               $lastSortKey = null;
-               $res = $this->select(__METHOD__);
-               while ($row = $db->fetchObject($res)) {
-                       if (++ $count > $limit) {
+               foreach ( $rows as $row ) {
+                       if ( ++ $count > $limit ) {
                                // We've reached the one extra which shows that there are additional pages to be had. Stop here...
                                // TODO: Security issue - if the user has no right to view next title, it will still be shown
-                               if ($params['sort'] == 'timestamp')
-                                       $this->setContinueEnumParameter('start', wfTimestamp(TS_ISO_8601, $row->cl_timestamp));
-                               else
-                                       $this->setContinueEnumParameter('continue', $this->getContinueStr($row, $lastSortKey));
+                               if ( $params['sort'] == 'timestamp' ) {
+                                       $this->setContinueEnumParameter( 'start', wfTimestamp( TS_ISO_8601, $row->cl_timestamp ) );
+                               } else {
+                                       $sortkey = bin2hex( $row->cl_sortkey );
+                                       $this->setContinueEnumParameter( 'continue',
+                                               "{$row->cl_type}|$sortkey|{$row->cl_from}"
+                                       );
+                               }
                                break;
                        }
 
-                       if (is_null($resultPageSet)) {
+                       // Since domas won't tell anyone what he told long ago, apply
+                       // cmnamespace here. This means the query may return 0 actual
+                       // results, but on the other hand it could save returning 5000
+                       // useless results to the client. ~~~~
+                       if ( count( $miser_ns ) && !in_array( $row->page_namespace, $miser_ns ) ) {
+                               continue;
+                       }
+
+                       if ( is_null( $resultPageSet ) ) {
                                $vals = array();
-                               if ($fld_ids)
-                                       $vals['pageid'] = intval($row->page_id);
-                               if ($fld_title) {
-                                       $title = Title :: makeTitle($row->page_namespace, $row->page_title);
-                                       ApiQueryBase::addTitleInfo($vals, $title);
+                               if ( $fld_ids ) {
+                                       $vals['pageid'] = intval( $row->page_id );
+                               }
+                               if ( $fld_title ) {
+                                       $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+                                       ApiQueryBase::addTitleInfo( $vals, $title );
+                               }
+                               if ( $fld_sortkey ) {
+                                       $vals['sortkey'] = bin2hex( $row->cl_sortkey );
+                               }
+                               if ( $fld_sortkeyprefix ) {
+                                       $vals['sortkeyprefix'] = $row->cl_sortkey_prefix;
+                               }
+                               if ( $fld_type  ) {
+                                       $vals['type'] = $row->cl_type;
                                }
-                               if ($fld_sortkey)
-                                       $vals['sortkey'] = $row->cl_sortkey;
-                               if ($fld_timestamp)
-                                       $vals['timestamp'] = wfTimestamp(TS_ISO_8601, $row->cl_timestamp);
-                               $fit = $this->getResult()->addValue(array('query', $this->getModuleName()),
-                                               null, $vals);
-                               if(!$fit)
-                               {
-                                       if ($params['sort'] == 'timestamp')
-                                               $this->setContinueEnumParameter('start', wfTimestamp(TS_ISO_8601, $row->cl_timestamp));
-                                       else
-                                               $this->setContinueEnumParameter('continue', $this->getContinueStr($row, $lastSortKey));
+                               if ( $fld_timestamp ) {
+                                       $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $row->cl_timestamp );
+                               }
+                               $fit = $this->getResult()->addValue( array( 'query', $this->getModuleName() ),
+                                               null, $vals );
+                               if ( !$fit ) {
+                                       if ( $params['sort'] == 'timestamp' ) {
+                                               $this->setContinueEnumParameter( 'start', wfTimestamp( TS_ISO_8601, $row->cl_timestamp ) );
+                                       } else {
+                                               $sortkey = bin2hex( $row->cl_sortkey );
+                                               $this->setContinueEnumParameter( 'continue',
+                                                       "{$row->cl_type}|$sortkey|{$row->cl_from}"
+                                               );
+                                       }
                                        break;
                                }
                        } else {
-                               $resultPageSet->processDbRow($row);
+                               $resultPageSet->processDbRow( $row );
                        }
-                       $lastSortKey = $row->cl_sortkey;        // detect duplicate sortkeys
                }
-               $db->freeResult($res);
 
-               if (is_null($resultPageSet)) {
+               if ( is_null( $resultPageSet ) ) {
                        $this->getResult()->setIndexedTagName_internal(
-                                        array('query', $this->getModuleName()), 'cm');
-               }
-       }
-
-       private function getContinueStr($row, $lastSortKey) {
-               $ret = $row->cl_sortkey . '|';
-               if ($row->cl_sortkey == $lastSortKey)   // duplicate sort key, add cl_from
-                       $ret .= $row->cl_from;
-               return $ret;
-       }
-
-       /**
-        * Add DB WHERE clause to continue previous query based on 'continue' parameter
-        */
-       private function setContinuation($continue, $dir) {
-               if (is_null($continue))
-                       return; // This is not a continuation request
-
-               $pos = strrpos($continue, '|');
-               $sortkey = substr($continue, 0, $pos);
-               $fromstr = substr($continue, $pos + 1);
-               $from = intval($fromstr);
-
-               if ($from == 0 && strlen($fromstr) > 0)
-                       $this->dieUsage("Invalid continue param. You should pass the original value returned by the previous query", "badcontinue");
-
-               $encSortKey = $this->getDB()->addQuotes($sortkey);
-               $encFrom = $this->getDB()->addQuotes($from);
-               
-               $op = ($dir == 'desc' ? '<' : '>');
-
-               if ($from != 0) {
-                       // Duplicate sort key continue
-                       $this->addWhere( "cl_sortkey$op$encSortKey OR (cl_sortkey=$encSortKey AND cl_from$op=$encFrom)" );
-               } else {
-                       $this->addWhere( "cl_sortkey$op=$encSortKey" );
+                                        array( 'query', $this->getModuleName() ), 'cm' );
                }
        }
 
        public function getAllowedParams() {
-               return array (
-                       'title' => null,
-                       'prop' => array (
-                               ApiBase :: PARAM_DFLT => 'ids|title',
-                               ApiBase :: PARAM_ISMULTI => true,
-                               ApiBase :: PARAM_TYPE => array (
+               return array(
+                       'title' => array(
+                               ApiBase::PARAM_TYPE => 'string',
+                               ApiBase::PARAM_REQUIRED => true
+                       ),
+                       'prop' => array(
+                               ApiBase::PARAM_DFLT => 'ids|title',
+                               ApiBase::PARAM_ISMULTI => true,
+                               ApiBase::PARAM_TYPE => array (
                                        'ids',
                                        'title',
                                        'sortkey',
+                                       'sortkeyprefix',
+                                       'type',
                                        'timestamp',
                                )
                        ),
                        'namespace' => array (
-                               ApiBase :: PARAM_ISMULTI => true,
-                               ApiBase :: PARAM_TYPE => 'namespace',
+                               ApiBase::PARAM_ISMULTI => true,
+                               ApiBase::PARAM_TYPE => 'namespace',
+                       ),
+                       'type' => array(
+                               ApiBase::PARAM_ISMULTI => true,
+                               ApiBase::PARAM_DFLT => 'page|subcat|file',
+                               ApiBase::PARAM_TYPE => array(
+                                       'page',
+                                       'subcat',
+                                       'file'
+                               )
                        ),
                        'continue' => null,
-                       'limit' => array (
-                               ApiBase :: PARAM_TYPE => 'limit',
-                               ApiBase :: PARAM_DFLT => 10,
-                               ApiBase :: PARAM_MIN => 1,
-                               ApiBase :: PARAM_MAX => ApiBase :: LIMIT_BIG1,
-                               ApiBase :: PARAM_MAX2 => ApiBase :: LIMIT_BIG2
+                       'limit' => array(
+                               ApiBase::PARAM_TYPE => 'limit',
+                               ApiBase::PARAM_DFLT => 10,
+                               ApiBase::PARAM_MIN => 1,
+                               ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
+                               ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
                        ),
                        'sort' => array(
-                               ApiBase :: PARAM_DFLT => 'sortkey',
-                               ApiBase :: PARAM_TYPE => array(
+                               ApiBase::PARAM_DFLT => 'sortkey',
+                               ApiBase::PARAM_TYPE => array(
                                        'sortkey',
                                        'timestamp'
                                )
                        ),
                        'dir' => array(
-                               ApiBase :: PARAM_DFLT => 'asc',
-                               ApiBase :: PARAM_TYPE => array(
+                               ApiBase::PARAM_DFLT => 'asc',
+                               ApiBase::PARAM_TYPE => array(
                                        'asc',
                                        'desc'
                                )
                        ),
                        'start' => array(
-                               ApiBase :: PARAM_TYPE => 'timestamp'
+                               ApiBase::PARAM_TYPE => 'timestamp'
                        ),
                        'end' => array(
-                               ApiBase :: PARAM_TYPE => 'timestamp'
+                               ApiBase::PARAM_TYPE => 'timestamp'
                        ),
                        'startsortkey' => null,
                        'endsortkey' => null,
@@ -237,35 +307,63 @@ class ApiQueryCategoryMembers extends ApiQueryGeneratorBase {
        }
 
        public function getParamDescription() {
-               return array (
+               global $wgMiserMode;
+               $p = $this->getModulePrefix();
+               $desc = array(
                        'title' => 'Which category to enumerate (required). Must include Category: prefix',
-                       'prop' => 'What pieces of information to include',
+                       'prop' => array(
+                               'What pieces of information to include',
+                               ' ids           - Adds the page ID',
+                               ' title         - Adds the title and namespace ID of the page',
+                               ' sortkey       - Adds the sortkey used for sorting in the category (hexadecimal string)',
+                               ' sortkeyprefix - Adds the sortkey prefix used for sorting in the category (human-readable part of the sortkey)',
+                               ' type          - Adds the type that the page has been categorised as (page, subcat or file)',
+                               ' timestamp     - Adds the timestamp of when the page was included',
+                       ),
                        'namespace' => 'Only include pages in these namespaces',
+                       'type' => "What type of category members to include. Ignored when {$p}sort=timestamp is set",
                        'sort' => 'Property to sort by',
                        'dir' => 'In which direction to sort',
-                       'start' => 'Timestamp to start listing from. Can only be used with cmsort=timestamp',
-                       'end' => 'Timestamp to end listing at. Can only be used with cmsort=timestamp',
-                       'startsortkey' => 'Sortkey to start listing from. Can only be used with cmsort=sortkey',
-                       'endsortkey' => 'Sortkey to end listing at. Can only be used with cmsort=sortkey',
+                       'start' => "Timestamp to start listing from. Can only be used with {$p}sort=timestamp",
+                       'end' => "Timestamp to end listing at. Can only be used with {$p}sort=timestamp",
+                       'startsortkey' => "Sortkey to start listing from. Can only be used with {$p}sort=sortkey",
+                       'endsortkey' => "Sortkey to end listing at. Can only be used with {$p}sort=sortkey",
                        'continue' => 'For large categories, give the value retured from previous query',
                        'limit' => 'The maximum number of pages to return.',
                );
+               if ( $wgMiserMode ) {
+                       $desc['namespace'] = array(
+                               $desc['namespace'],
+                               'NOTE: Due to $wgMiserMode, using this may result in fewer than "limit" results',
+                               'returned before continuing; in extreme cases, zero results may be returned.',
+                               'Note that you can use cmtype=subcat or cmtype=file instead of cmnamespace=14 or 6.',
+                       );
+               }
+               return $desc;
        }
 
        public function getDescription() {
                return 'List all pages in a given category';
        }
 
+       public function getPossibleErrors() {
+               return array_merge( parent::getPossibleErrors(), array(
+                       array( 'code' => 'notitle', 'info' => 'The cmtitle parameter is required' ),
+                       array( 'code' => 'invalidcategory', 'info' => 'The category name you entered is not valid' ),
+                       array( 'code' => 'badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ),
+               ) );
+       }
+
        protected function getExamples() {
-               return array (
-                               "Get first 10 pages in [[Category:Physics]]:",
-                               "  api.php?action=query&list=categorymembers&cmtitle=Category:Physics",
-                               "Get page info about first 10 pages in [[Category:Physics]]:",
-                               "  api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info",
-                       );
+               return array(
+                       'Get first 10 pages in [[Category:Physics]]:',
+                       '  api.php?action=query&list=categorymembers&cmtitle=Category:Physics',
+                       'Get page info about first 10 pages in [[Category:Physics]]:',
+                       '  api.php?action=query&generator=categorymembers&gcmtitle=Category:Physics&prop=info',
+               );
        }
 
        public function getVersion() {
-               return __CLASS__ . ': $Id: ApiQueryCategoryMembers.php 69986 2010-07-27 03:57:39Z tstarling $';
+               return __CLASS__ . ': $Id$';
        }
 }