]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - maintenance/importImages.php
MediaWiki 1.30.2-scripts2
[autoinstallsdev/mediawiki.git] / maintenance / importImages.php
1 <?php
2 /**
3  * Import one or more images from the local file system into the wiki without
4  * using the web-based interface.
5  *
6  * "Smart import" additions:
7  * - aim: preserve the essential metadata (user, description) when importing media
8  *   files from an existing wiki.
9  * - process:
10  *      - interface with the source wiki, don't use bare files only (see --source-wiki-url).
11  *      - fetch metadata from source wiki for each file to import.
12  *      - commit the fetched metadata to the destination wiki while submitting.
13  *
14  * This program is free software; you can redistribute it and/or modify
15  * it under the terms of the GNU General Public License as published by
16  * the Free Software Foundation; either version 2 of the License, or
17  * (at your option) any later version.
18  *
19  * This program is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU General Public License for more details.
23  *
24  * You should have received a copy of the GNU General Public License along
25  * with this program; if not, write to the Free Software Foundation, Inc.,
26  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27  * http://www.gnu.org/copyleft/gpl.html
28  *
29  * @file
30  * @ingroup Maintenance
31  * @author Rob Church <robchur@gmail.com>
32  * @author Mij <mij@bitchx.it>
33  */
34
35 require_once __DIR__ . '/Maintenance.php';
36
37 class ImportImages extends Maintenance {
38
39         public function __construct() {
40                 parent::__construct();
41
42                 $this->addDescription( 'Imports images and other media files into the wiki' );
43                 $this->addArg( 'dir', 'Path to the directory containing images to be imported' );
44
45                 $this->addOption( 'extensions',
46                         'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
47                         false,
48                         true
49                 );
50                 $this->addOption( 'overwrite',
51                         'Overwrite existing images with the same name (default is to skip them)' );
52                 $this->addOption( 'limit',
53                         'Limit the number of images to process. Ignored or skipped images are not counted',
54                         false,
55                         true
56                 );
57                 $this->addOption( 'from',
58                         "Ignore all files until the one with the given name. Useful for resuming aborted "
59                                 . "imports. The name should be the file's canonical database form.",
60                         false,
61                         true
62                 );
63                 $this->addOption( 'skip-dupes',
64                         'Skip images that were already uploaded under a different name (check SHA1)' );
65                 $this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
66                 $this->addOption( 'sleep',
67                         'Sleep between files. Useful mostly for debugging',
68                         false,
69                         true
70                 );
71                 $this->addOption( 'user',
72                         "Set username of uploader, default 'Maintenance script'",
73                         false,
74                         true
75                 );
76                 // This parameter can optionally have an argument. If none specified, getOption()
77                 // returns 1 which is precisely what we need.
78                 $this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
79                 $this->addOption( 'comment',
80                         "Set file description, default 'Importing file'",
81                         false,
82                         true
83                 );
84                 $this->addOption( 'comment-file',
85                         'Set description to the content of this file',
86                         false,
87                         true
88                 );
89                 $this->addOption( 'comment-ext',
90                         'Causes the description for each file to be loaded from a file with the same name, but '
91                                 . 'the extension provided. If a global description is also given, it is appended.',
92                         false,
93                         true
94                 );
95                 $this->addOption( 'summary',
96                         'Upload summary, description will be used if not provided',
97                         false,
98                         true
99                 );
100                 $this->addOption( 'license',
101                         'Use an optional license template',
102                         false,
103                         true
104                 );
105                 $this->addOption( 'timestamp',
106                         'Override upload time/date, all MediaWiki timestamp formats are accepted',
107                         false,
108                         true
109                 );
110                 $this->addOption( 'protect',
111                         'Specify the protect value (autoconfirmed,sysop)',
112                         false,
113                         true
114                 );
115                 $this->addOption( 'unprotect', 'Unprotects all uploaded images' );
116                 $this->addOption( 'source-wiki-url',
117                         'If specified, take User and Comment data for each imported file from this URL. '
118                                 . 'For example, --source-wiki-url="http://en.wikipedia.org/',
119                         false,
120                         true
121                 );
122                 $this->addOption( 'dry', "Dry run, don't import anything" );
123         }
124
125         public function execute() {
126                 global $wgFileExtensions, $wgUser, $wgRestrictionLevels;
127
128                 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
129
130                 $this->output( "Import Images\n\n" );
131
132                 $dir = $this->getArg( 0 );
133
134                 # Check Protection
135                 if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
136                         $this->error( "Cannot specify both protect and unprotect.  Only 1 is allowed.\n", 1 );
137                 }
138
139                 if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
140                         $this->error( "You must specify a protection option.\n", 1 );
141                 }
142
143                 # Prepare the list of allowed extensions
144                 $extensions = $this->hasOption( 'extensions' )
145                         ? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
146                         : $wgFileExtensions;
147
148                 # Search the path provided for candidates for import
149                 $files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
150
151                 # Initialise the user for this operation
152                 $user = $this->hasOption( 'user' )
153                         ? User::newFromName( $this->getOption( 'user' ) )
154                         : User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
155                 if ( !$user instanceof User ) {
156                         $user = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
157                 }
158                 $wgUser = $user;
159
160                 # Get block check. If a value is given, this specified how often the check is performed
161                 $checkUserBlock = (int)$this->getOption( 'check-userblock' );
162
163                 $from = $this->getOption( 'from' );
164                 $sleep = (int)$this->getOption( 'sleep' );
165                 $limit = (int)$this->getOption( 'limit' );
166                 $timestamp = $this->getOption( 'timestamp', false );
167
168                 # Get the upload comment. Provide a default one in case there's no comment given.
169                 $commentFile = $this->getOption( 'comment-file' );
170                 if ( $commentFile !== null ) {
171                         $comment = file_get_contents( $commentFile );
172                         if ( $comment === false || $comment === null ) {
173                                 $this->error( "failed to read comment file: {$commentFile}\n", 1 );
174                         }
175                 } else {
176                         $comment = $this->getOption( 'comment', 'Importing file' );
177                 }
178                 $commentExt = $this->getOption( 'comment-ext' );
179                 $summary = $this->getOption( 'summary', '' );
180
181                 $license = $this->getOption( 'license', '' );
182
183                 $sourceWikiUrl = $this->getOption( 'source-wiki-url' );
184
185                 # Batch "upload" operation
186                 $count = count( $files );
187                 if ( $count > 0 ) {
188                         foreach ( $files as $file ) {
189                                 if ( $sleep && ( $processed > 0 ) ) {
190                                         sleep( $sleep );
191                                 }
192
193                                 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
194
195                                 # Validate a title
196                                 $title = Title::makeTitleSafe( NS_FILE, $base );
197                                 if ( !is_object( $title ) ) {
198                                         $this->output(
199                                                 "{$base} could not be imported; a valid title cannot be produced\n" );
200                                         continue;
201                                 }
202
203                                 if ( $from ) {
204                                         if ( $from == $title->getDBkey() ) {
205                                                 $from = null;
206                                         } else {
207                                                 $ignored++;
208                                                 continue;
209                                         }
210                                 }
211
212                                 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
213                                         $user->clearInstanceCache( 'name' ); // reload from DB!
214                                         if ( $user->isBlocked() ) {
215                                                 $this->output( $user->getName() . " was blocked! Aborting.\n" );
216                                                 break;
217                                         }
218                                 }
219
220                                 # Check existence
221                                 $image = wfLocalFile( $title );
222                                 if ( $image->exists() ) {
223                                         if ( $this->hasOption( 'overwrite' ) ) {
224                                                 $this->output( "{$base} exists, overwriting..." );
225                                                 $svar = 'overwritten';
226                                         } else {
227                                                 $this->output( "{$base} exists, skipping\n" );
228                                                 $skipped++;
229                                                 continue;
230                                         }
231                                 } else {
232                                         if ( $this->hasOption( 'skip-dupes' ) ) {
233                                                 $repo = $image->getRepo();
234                                                 # XXX: we end up calculating this again when actually uploading. that sucks.
235                                                 $sha1 = FSFile::getSha1Base36FromPath( $file );
236
237                                                 $dupes = $repo->findBySha1( $sha1 );
238
239                                                 if ( $dupes ) {
240                                                         $this->output(
241                                                                 "{$base} already exists as {$dupes[0]->getName()}, skipping\n" );
242                                                         $skipped++;
243                                                         continue;
244                                                 }
245                                         }
246
247                                         $this->output( "Importing {$base}..." );
248                                         $svar = 'added';
249                                 }
250
251                                 if ( $sourceWikiUrl ) {
252                                         /* find comment text directly from source wiki, through MW's API */
253                                         $real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
254                                         if ( $real_comment === false ) {
255                                                 $commentText = $comment;
256                                         } else {
257                                                 $commentText = $real_comment;
258                                         }
259
260                                         /* find user directly from source wiki, through MW's API */
261                                         $real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
262                                         if ( $real_user === false ) {
263                                                 $wgUser = $user;
264                                         } else {
265                                                 $wgUser = User::newFromName( $real_user );
266                                                 if ( $wgUser === false ) {
267                                                         # user does not exist in target wiki
268                                                         $this->output(
269                                                                 "failed: user '$real_user' does not exist in target wiki." );
270                                                         continue;
271                                                 }
272                                         }
273                                 } else {
274                                         # Find comment text
275                                         $commentText = false;
276
277                                         if ( $commentExt ) {
278                                                 $f = $this->findAuxFile( $file, $commentExt );
279                                                 if ( !$f ) {
280                                                         $this->output( " No comment file with extension {$commentExt} found "
281                                                                  . "for {$file}, using default comment. " );
282                                                 } else {
283                                                         $commentText = file_get_contents( $f );
284                                                         if ( !$commentText ) {
285                                                                 $this->output(
286                                                                         " Failed to load comment file {$f}, using default comment. " );
287                                                         }
288                                                 }
289                                         }
290
291                                         if ( !$commentText ) {
292                                                 $commentText = $comment;
293                                         }
294                                 }
295
296                                 # Import the file
297                                 if ( $this->hasOption( 'dry' ) ) {
298                                         $this->output(
299                                                 " publishing {$file} by '{$wgUser->getName()}', comment '$commentText'... "
300                                         );
301                                 } else {
302                                         $mwProps = new MWFileProps( MimeMagic::singleton() );
303                                         $props = $mwProps->getPropsFromPath( $file, true );
304                                         $flags = 0;
305                                         $publishOptions = [];
306                                         $handler = MediaHandler::getHandler( $props['mime'] );
307                                         if ( $handler ) {
308                                                 $metadata = MediaWiki\quietCall( 'unserialize', $props['metadata'] );
309
310                                                 $publishOptions['headers'] = $handler->getContentHeaders( $metadata );
311                                         } else {
312                                                 $publishOptions['headers'] = [];
313                                         }
314                                         $archive = $image->publish( $file, $flags, $publishOptions );
315                                         if ( !$archive->isGood() ) {
316                                                 $this->output( "failed. (" .
317                                                          $archive->getWikiText( false, false, 'en' ) .
318                                                          ")\n" );
319                                                 $failed++;
320                                                 continue;
321                                         }
322                                 }
323
324                                 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
325                                 if ( !$this->hasOption( 'summary' ) ) {
326                                         $summary = $commentText;
327                                 }
328
329                                 if ( $this->hasOption( 'dry' ) ) {
330                                         $this->output( "done.\n" );
331                                 } elseif ( $image->recordUpload2(
332                                         $archive->value,
333                                         $summary,
334                                         $commentText,
335                                         $props,
336                                         $timestamp
337                                 ) ) {
338                                         # We're done!
339                                         $this->output( "done.\n" );
340
341                                         $doProtect = false;
342
343                                         $protectLevel = $this->getOption( 'protect' );
344
345                                         if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
346                                                 $doProtect = true;
347                                         }
348                                         if ( $this->hasOption( 'unprotect' ) ) {
349                                                 $protectLevel = '';
350                                                 $doProtect = true;
351                                         }
352
353                                         if ( $doProtect ) {
354                                                 # Protect the file
355                                                 $this->output( "\nWaiting for replica DBs...\n" );
356                                                 // Wait for replica DBs.
357                                                 sleep( 2.0 ); # Why this sleep?
358                                                 wfWaitForSlaves();
359
360                                                 $this->output( "\nSetting image restrictions ... " );
361
362                                                 $cascade = false;
363                                                 $restrictions = [];
364                                                 foreach ( $title->getRestrictionTypes() as $type ) {
365                                                         $restrictions[$type] = $protectLevel;
366                                                 }
367
368                                                 $page = WikiPage::factory( $title );
369                                                 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
370                                                 $this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
371                                         }
372                                 } else {
373                                         $this->output( "failed. (at recordUpload stage)\n" );
374                                         $svar = 'failed';
375                                 }
376
377                                 $$svar++;
378                                 $processed++;
379
380                                 if ( $limit && $processed >= $limit ) {
381                                         break;
382                                 }
383                         }
384
385                         # Print out some statistics
386                         $this->output( "\n" );
387                         foreach (
388                                 [
389                                         'count' => 'Found',
390                                         'limit' => 'Limit',
391                                         'ignored' => 'Ignored',
392                                         'added' => 'Added',
393                                         'skipped' => 'Skipped',
394                                         'overwritten' => 'Overwritten',
395                                         'failed' => 'Failed'
396                                 ] as $var => $desc
397                         ) {
398                                 if ( $$var > 0 ) {
399                                         $this->output( "{$desc}: {$$var}\n" );
400                                 }
401                         }
402                 } else {
403                         $this->output( "No suitable files could be found for import.\n" );
404                 }
405         }
406
407         /**
408          * Search a directory for files with one of a set of extensions
409          *
410          * @param string $dir Path to directory to search
411          * @param array $exts Array of extensions to search for
412          * @param bool $recurse Search subdirectories recursively
413          * @return array|bool Array of filenames on success, or false on failure
414          */
415         private function findFiles( $dir, $exts, $recurse = false ) {
416                 if ( is_dir( $dir ) ) {
417                         $dhl = opendir( $dir );
418                         if ( $dhl ) {
419                                 $files = [];
420                                 while ( ( $file = readdir( $dhl ) ) !== false ) {
421                                         if ( is_file( $dir . '/' . $file ) ) {
422                                                 list( /* $name */, $ext ) = $this->splitFilename( $dir . '/' . $file );
423                                                 if ( array_search( strtolower( $ext ), $exts ) !== false ) {
424                                                         $files[] = $dir . '/' . $file;
425                                                 }
426                                         } elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
427                                                 $files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
428                                         }
429                                 }
430
431                                 return $files;
432                         } else {
433                                 return [];
434                         }
435                 } else {
436                         return [];
437                 }
438         }
439
440         /**
441          * Split a filename into filename and extension
442          *
443          * @param string $filename Filename
444          * @return array
445          */
446         private function splitFilename( $filename ) {
447                 $parts = explode( '.', $filename );
448                 $ext = $parts[count( $parts ) - 1];
449                 unset( $parts[count( $parts ) - 1] );
450                 $fname = implode( '.', $parts );
451
452                 return [ $fname, $ext ];
453         }
454
455         /**
456          * Find an auxilliary file with the given extension, matching
457          * the give base file path. $maxStrip determines how many extensions
458          * may be stripped from the original file name before appending the
459          * new extension. For example, with $maxStrip = 1 (the default),
460          * file files acme.foo.bar.txt and acme.foo.txt would be auxilliary
461          * files for acme.foo.bar and the extension ".txt". With $maxStrip = 2,
462          * acme.txt would also be acceptable.
463          *
464          * @param string $file Base path
465          * @param string $auxExtension The extension to be appended to the base path
466          * @param int $maxStrip The maximum number of extensions to strip from the base path (default: 1)
467          * @return string|bool
468          */
469         private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
470                 if ( strpos( $auxExtension, '.' ) !== 0 ) {
471                         $auxExtension = '.' . $auxExtension;
472                 }
473
474                 $d = dirname( $file );
475                 $n = basename( $file );
476
477                 while ( $maxStrip >= 0 ) {
478                         $f = $d . '/' . $n . $auxExtension;
479
480                         if ( file_exists( $f ) ) {
481                                 return $f;
482                         }
483
484                         $idx = strrpos( $n, '.' );
485                         if ( !$idx ) {
486                                 break;
487                         }
488
489                         $n = substr( $n, 0, $idx );
490                         $maxStrip -= 1;
491                 }
492
493                 return false;
494         }
495
496         # @todo FIXME: Access the api in a saner way and performing just one query
497         # (preferably batching files too).
498         private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
499                 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
500                         . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
501                 $body = Http::get( $url, [], __METHOD__ );
502                 if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
503                         return false;
504                 }
505
506                 return html_entity_decode( $matches[1] );
507         }
508
509         private function getFileUserFromSourceWiki( $wiki_host, $file ) {
510                 $url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
511                         . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
512                 $body = Http::get( $url, [], __METHOD__ );
513                 if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
514                         return false;
515                 }
516
517                 return html_entity_decode( $matches[1] );
518         }
519
520 }
521
522 $maintClass = 'ImportImages';
523 require_once RUN_MAINTENANCE_IF_MAIN;