]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blobdiff - includes/filerepo/ForeignAPIRepo.php
MediaWiki 1.17.0
[autoinstallsdev/mediawiki.git] / includes / filerepo / ForeignAPIRepo.php
index 264cb920f884966ada9bac30e8810c7f88d566a7..e4188d6be99f252dd638ca286d2ae9a492b5ecbd 100644 (file)
@@ -1,9 +1,13 @@
 <?php
+/**
+ * Foreign repository accessible through api.php requests.
+ *
+ * @file
+ * @ingroup FileRepo
+ */
 
 /**
  * A foreign repository with a remote MediaWiki with an API thingy
- * Very hacky and inefficient
- * do not use except for testing :D
  *
  * Example config:
  *
  * @ingroup FileRepo
  */
 class ForeignAPIRepo extends FileRepo {
+       /* This version string is used in the user agent for requests and will help
+        * server maintainers in identify ForeignAPI usage.
+        * Update the version every time you make breaking or significant changes. */
+       const VERSION = "2.0";
+
        var $fileFactory = array( 'ForeignAPIFile', 'newFromTitle' );
+       /* Check back with Commons after a day */
        var $apiThumbCacheExpiry = 86400;
+       /* Redownload thumbnail files after a month */
+       var $fileCacheExpiry = 2629743;
+       /* Local image directory */
+       var $directory;
+       var $thumbDir;
+
        protected $mQueryCache = array();
        protected $mFileExists = array();
 
        function __construct( $info ) {
                parent::__construct( $info );
-               $this->mApiBase = $info['apibase']; // http://commons.wikimedia.org/w/api.php
+               global $wgUploadDirectory;
+
+               // http://commons.wikimedia.org/w/api.php
+               $this->mApiBase = isset( $info['apibase'] ) ? $info['apibase'] : null;
+               $this->directory = isset( $info['directory'] ) ? $info['directory'] : $wgUploadDirectory;
+
                if( isset( $info['apiThumbCacheExpiry'] ) ) {
                        $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
                }
+               if( isset( $info['fileCacheExpiry'] ) ) {
+                       $this->fileCacheExpiry = $info['fileCacheExpiry'];
+               }
                if( !$this->scriptDirUrl ) {
                        // hack for description fetches
                        $this->scriptDirUrl = dirname( $this->mApiBase );
@@ -41,6 +65,11 @@ class ForeignAPIRepo extends FileRepo {
                if( $this->canCacheThumbs() && !$this->thumbUrl ) {
                        $this->thumbUrl = $this->url . '/thumb';
                }
+               if ( isset( $info['thumbDir'] ) ) {
+                       $this->thumbDir =  $info['thumbDir'];
+               } else {
+                       $this->thumbDir = "{$this->directory}/thumb";
+               }
        }
 
        /**
@@ -89,7 +118,7 @@ class ForeignAPIRepo extends FileRepo {
                        }
                }
 
-               $results = $this->fetchImageQuery( array( 'titles' => implode( $files, '|' ),
+               $data = $this->fetchImageQuery( array( 'titles' => implode( $files, '|' ),
                                                                                        'prop' => 'imageinfo' ) );
                if( isset( $data['query']['pages'] ) ) {
                        $i = 0;
@@ -98,40 +127,32 @@ class ForeignAPIRepo extends FileRepo {
                                $i++;
                        }
                }
+               return $results;
        }
        function getFileProps( $virtualUrl ) {
                return false;
        }
 
-       protected function queryImage( $query ) {
-               $data = $this->fetchImageQuery( $query );
-
-               if( isset( $data['query']['pages'] ) ) {
-                       foreach( $data['query']['pages'] as $pageid => $info ) {
-                               if( isset( $info['imageinfo'][0] ) ) {
-                                       return $info['imageinfo'][0];
-                               }
-                       }
-               }
-               return false;
-       }
-
-       protected function fetchImageQuery( $query ) {
+       function fetchImageQuery( $query ) {
                global $wgMemc;
 
-               $url = $this->mApiBase .
-                       '?' .
-                       wfArrayToCgi(
-                               array_merge( $query,
-                                       array(
-                                               'format' => 'json',
-                                               'action' => 'query' ) ) );
+               $query = array_merge( $query,
+                       array(
+                               'format' => 'json',
+                               'action' => 'query',
+                               'redirects' => 'true'
+                       ) );
+               if ( $this->mApiBase ) {
+                       $url = wfAppendQuery( $this->mApiBase, $query );
+               } else {
+                       $url = $this->makeUrl( $query, 'api' );
+               }
 
                if( !isset( $this->mQueryCache[$url] ) ) {
                        $key = $this->getLocalCacheKey( 'ForeignAPIRepo', 'Metadata', md5( $url ) );
                        $data = $wgMemc->get( $key );
                        if( !$data ) {
-                               $data = Http::get( $url );
+                               $data = self::httpGet( $url );
                                if ( !$data ) {
                                        return null;
                                }
@@ -147,81 +168,141 @@ class ForeignAPIRepo extends FileRepo {
                return FormatJson::decode( $this->mQueryCache[$url], true );
        }
 
-       function getImageInfo( $title, $time = false ) {
-               return $this->queryImage( array(
-                       'titles' => 'Image:' . $title->getText(),
-                       'iiprop' => 'timestamp|user|comment|url|size|sha1|metadata|mime',
-                       'prop' => 'imageinfo' ) );
+       function getImageInfo( $data ) {
+               if( $data && isset( $data['query']['pages'] ) ) {
+                       foreach( $data['query']['pages'] as $info ) {
+                               if( isset( $info['imageinfo'][0] ) ) {
+                                       return $info['imageinfo'][0];
+                               }
+                       }
+               }
+               return false;
        }
 
        function findBySha1( $hash ) {
                $results = $this->fetchImageQuery( array(
                                                                                'aisha1base36' => $hash,
-                                                                               'aiprop'       => 'timestamp|user|comment|url|size|sha1|metadata|mime',
+                                                                               'aiprop'       => ForeignAPIFile::getProps(),
                                                                                'list'         => 'allimages', ) );
                $ret = array();
                if ( isset( $results['query']['allimages'] ) ) {
                        foreach ( $results['query']['allimages'] as $img ) {
+                               // 1.14 was broken, doesn't return name attribute
+                               if( !isset( $img['name'] ) ) {
+                                       continue;
+                               }
                                $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
                        }
                }
                return $ret;
        }
 
-       function getThumbUrl( $name, $width=-1, $height=-1 ) {
-               $info = $this->queryImage( array(
-                       'titles' => 'Image:' . $name,
-                       'iiprop' => 'url',
+       function getThumbUrl( $name, $width=-1, $height=-1, &$result=NULL ) {
+               $data = $this->fetchImageQuery( array(
+                       'titles' => 'File:' . $name,
+                       'iiprop' => 'url|timestamp',
                        'iiurlwidth' => $width,
                        'iiurlheight' => $height,
                        'prop' => 'imageinfo' ) );
-               if( $info && $info['thumburl'] ) {
+               $info = $this->getImageInfo( $data );
+
+               if( $data && $info && isset( $info['thumburl'] ) ) {
                        wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] . "\n" );
+                       $result = $info;
                        return $info['thumburl'];
                } else {
                        return false;
                }
        }
 
+       /*
+        * Return the imageurl from cache if possible
+        *
+        * If the url has been requested today, get it from cache
+        * Otherwise retrieve remote thumb url, check for local file.
+        *
+        * @param $name String is a dbkey form of a title
+        * @param $width
+        * @param $height
+        */
        function getThumbUrlFromCache( $name, $width, $height ) {
-               global $wgMemc, $wgUploadPath, $wgServer, $wgUploadDirectory;
+               global $wgMemc;
 
                if ( !$this->canCacheThumbs() ) {
                        return $this->getThumbUrl( $name, $width, $height );
                }
-
                $key = $this->getLocalCacheKey( 'ForeignAPIRepo', 'ThumbUrl', $name );
-               if ( $thumbUrl = $wgMemc->get($key) ) {
-                       wfDebug("Got thumb from local cache. $thumbUrl \n");
-                       return $thumbUrl;
-               }
-               else {
-                       $foreignUrl = $this->getThumbUrl( $name, $width, $height );
-                       if( !$foreignUrl ) {
-                               wfDebug( __METHOD__ . " Could not find thumburl\n" );
-                               return false;
-                       }
-                       $thumb = Http::get( $foreignUrl );
-                       if( !$thumb ) {
-                               wfDebug( __METHOD__ . " Could not download thumb\n" );
-                               return false;
+               $sizekey = "$width:$height";
+
+               /* Get the array of urls that we already know */
+               $knownThumbUrls = $wgMemc->get($key);
+               if( !$knownThumbUrls ) {
+                       /* No knownThumbUrls for this file */
+                       $knownThumbUrls = array();
+               } else {
+                       if( isset( $knownThumbUrls[$sizekey] ) ) {
+                               wfDebug("Got thumburl from local cache. {$knownThumbUrls[$sizekey]} \n");
+                               return $knownThumbUrls[$sizekey];
                        }
-                       // We need the same filename as the remote one :)
-                       $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
-                       $path = 'thumb/' . $this->getHashPath( $name ) . $name . "/";
-                       if ( !is_dir($wgUploadDirectory . '/' . $path) ) {
-                               wfMkdirParents($wgUploadDirectory . '/' . $path);
+                       /* This size is not yet known */
+               }
+
+               $metadata = null;
+               $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata );
+
+               if( !$foreignUrl ) {
+                       wfDebug( __METHOD__ . " Could not find thumburl\n" );
+                       return false;
+               }
+
+               // We need the same filename as the remote one :)
+               $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
+               if( !$this->validateFilename( $fileName ) ) {
+                       wfDebug( __METHOD__ . " The deduced filename $fileName is not safe\n" );
+                       return false;
+               }
+               $localPath =  $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
+               $localFilename = $localPath . "/" . $fileName;
+               $localUrl =  $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) . rawurlencode( $name ) . "/" . rawurlencode( $fileName );
+
+               if( file_exists( $localFilename ) && isset( $metadata['timestamp'] ) ) {
+                       wfDebug( __METHOD__ . " Thumbnail was already downloaded before\n" );
+                       $modified = filemtime( $localFilename );
+                       $remoteModified = strtotime( $metadata['timestamp'] );
+                       $current = time();
+                       $diff = abs( $modified - $current );
+                       if( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
+                               /* Use our current and already downloaded thumbnail */
+                               $knownThumbUrls["$width:$height"] = $localUrl;
+                               $wgMemc->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
+                               return $localUrl;
                        }
-                       $localUrl =  $wgServer . $wgUploadPath . '/' . $path . $fileName;
-                       # FIXME: Delete old thumbs that aren't being used. Maintenance script?
-                       if( !file_put_contents($wgUploadDirectory . '/' . $path . $fileName, $thumb ) ) {
-                               wfDebug( __METHOD__ . " could not write to thumb path\n" );
+                       /* There is a new Commons file, or existing thumbnail older than a month */
+               }
+               $thumb = self::httpGet( $foreignUrl );
+               if( !$thumb ) {
+                       wfDebug( __METHOD__ . " Could not download thumb\n" );
+                       return false;
+               }
+               if ( !is_dir($localPath) ) {
+                       if( !wfMkdirParents($localPath) ) {
+                               wfDebug(  __METHOD__ . " could not create directory $localPath for thumb\n" );
                                return $foreignUrl;
                        }
-                       $wgMemc->set( $key, $localUrl, $this->apiThumbCacheExpiry );
-                       wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache \n" );
-                       return $localUrl;
                }
+
+               # FIXME: Delete old thumbs that aren't being used. Maintenance script?
+               wfSuppressWarnings();
+               if( !file_put_contents( $localFilename, $thumb ) ) {
+                       wfRestoreWarnings();
+                       wfDebug( __METHOD__ . " could not write to thumb path\n" );
+                       return $foreignUrl;
+               }
+               wfRestoreWarnings();
+               $knownThumbUrls[$sizekey] = $localUrl;
+               $wgMemc->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
+               wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache \n" );
+               return $localUrl;
        }
 
        /**
@@ -238,6 +319,20 @@ class ForeignAPIRepo extends FileRepo {
                }
        }
 
+       /**
+        * Get the local directory corresponding to one of the three basic zones
+        */
+       function getZonePath( $zone ) {
+               switch ( $zone ) {
+                       case 'public':
+                               return $this->directory;
+                       case 'thumb':
+                               return $this->thumbDir;
+                       default:
+                               return false;
+               }
+       }
+
        /**
         * Are we locally caching the thumbnails?
         * @return bool
@@ -245,4 +340,37 @@ class ForeignAPIRepo extends FileRepo {
        public function canCacheThumbs() {
                return ( $this->apiThumbCacheExpiry > 0 );
        }
+
+       /**
+        * The user agent the ForeignAPIRepo will use.
+        */
+       public static function getUserAgent() {
+               return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION;
+       }
+
+       /**
+        * Like a Http:get request, but with custom User-Agent.
+        * @see Http:get
+        */
+       public static function httpGet( $url, $timeout = 'default', $options = array() ) {
+               $options['timeout'] = $timeout;
+               /* Http::get */
+               $url = wfExpandUrl( $url );
+               wfDebug( "ForeignAPIRepo: HTTP GET: $url\n" );
+               $options['method'] = "GET";
+
+               if ( !isset( $options['timeout'] ) ) {
+                       $options['timeout'] = 'default';
+               }
+
+               $req = MWHttpRequest::factory( $url, $options );
+               $req->setUserAgent( ForeignAPIRepo::getUserAgent() );
+               $status = $req->execute();
+
+               if ( $status->isOK() ) {
+                       return $req->getContent();
+               } else {
+                       return false;
+               }
+       }
 }