]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - maintenance/importImages.php
MediaWiki 1.17.1-scripts
[autoinstalls/mediawiki.git] / maintenance / importImages.php
1 <?php
2
3 /**
4  * Maintenance script to import one or more images from the local file system into
5  * the wiki without using the web-based interface.
6  *
7  * "Smart import" additions:
8  * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
9  * - process:
10  *      - interface with the source wiki, don't use bare files only (see --source-wiki-url).
11  *      - fetch metadata from source wiki for each file to import.
12  *      - commit the fetched metadata to the destination wiki while submitting.
13  *
14  * @file
15  * @ingroup Maintenance
16  * @author Rob Church <robchur@gmail.com>
17  * @author Mij <mij@bitchx.it>
18  */
19
20 $optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
21 require_once( dirname( __FILE__ ) . '/commandLine.inc' );
22 require_once( dirname( __FILE__ ) . '/importImages.inc' );
23 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
24
25 echo( "Import Images\n\n" );
26
27 # Need a path
28 if ( count( $args ) > 0 ) {
29
30         $dir = $args[0];
31
32         # Check Protection
33         if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) )
34                         die( "Cannot specify both protect and unprotect.  Only 1 is allowed.\n" );
35
36         if ( isset( $options['protect'] ) && $options['protect'] == 1 )
37                         die( "You must specify a protection option.\n" );
38
39         # Prepare the list of allowed extensions
40         global $wgFileExtensions;
41         $extensions = isset( $options['extensions'] )
42                 ? explode( ',', strtolower( $options['extensions'] ) )
43                 : $wgFileExtensions;
44
45         # Search the path provided for candidates for import
46         $files = findFiles( $dir, $extensions );
47
48         # Initialise the user for this operation
49         $user = isset( $options['user'] )
50                 ? User::newFromName( $options['user'] )
51                 : User::newFromName( 'Maintenance script' );
52         if ( !$user instanceof User )
53                 $user = User::newFromName( 'Maintenance script' );
54         $wgUser = $user;
55
56         # Get block check. If a value is given, this specified how often the check is performed
57         if ( isset( $options['check-userblock'] ) ) {
58                 if ( !$options['check-userblock'] ) $checkUserBlock = 1;
59                 else $checkUserBlock = (int)$options['check-userblock'];
60         } else {
61                 $checkUserBlock = false;
62         }
63
64         # Get --from
65         $from = @$options['from'];
66
67         # Get sleep time.
68         $sleep = @$options['sleep'];
69         if ( $sleep ) $sleep = (int)$sleep;
70
71         # Get limit number
72         $limit = @$options['limit'];
73         if ( $limit ) $limit = (int)$limit;
74
75         # Get the upload comment. Provide a default one in case there's no comment given.
76         $comment = 'Importing image file';
77
78         if ( isset( $options['comment-file'] ) ) {
79                 $comment =  file_get_contents( $options['comment-file'] );
80                 if ( $comment === false || $comment === NULL ) {
81                         die( "failed to read comment file: {$options['comment-file']}\n" );
82                 }
83         }
84         else if ( isset( $options['comment'] ) ) {
85                 $comment =  $options['comment'];
86         }
87
88         $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
89
90         # Get the license specifier
91         $license = isset( $options['license'] ) ? $options['license'] : '';
92
93         # Batch "upload" operation
94         if ( ( $count = count( $files ) ) > 0 ) {
95
96                 foreach ( $files as $file ) {
97                         $base = wfBaseName( $file );
98
99                         # Validate a title
100                         $title = Title::makeTitleSafe( NS_FILE, $base );
101                         if ( !is_object( $title ) ) {
102                                 echo( "{$base} could not be imported; a valid title cannot be produced\n" );
103                                 continue;
104                         }
105
106                         if ( $from ) {
107                                 if ( $from == $title->getDBkey() ) {
108                                         $from = NULL;
109                                 } else {
110                                         $ignored++;
111                                         continue;
112                                 }
113                         }
114
115                         if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
116                                 $user->clearInstanceCache( 'name' ); // reload from DB!
117                                 if ( $user->isBlocked() ) {
118                                         echo( $user->getName() . " was blocked! Aborting.\n" );
119                                         break;
120                                 }
121                         }
122
123                         # Check existence
124                         $image = wfLocalFile( $title );
125                         if ( $image->exists() ) {
126                                 if ( isset( $options['overwrite'] ) ) {
127                                         echo( "{$base} exists, overwriting..." );
128                                         $svar = 'overwritten';
129                                 } else {
130                                         echo( "{$base} exists, skipping\n" );
131                                         $skipped++;
132                                         continue;
133                                 }
134                         } else {
135                                 if ( isset( $options['skip-dupes'] ) ) {
136                                         $repo = $image->getRepo();
137                                         $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
138
139                                         $dupes = $repo->findBySha1( $sha1 );
140
141                                         if ( $dupes ) {
142                                                 echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" );
143                                                 $skipped++;
144                                                 continue;
145                                         }
146                                 }
147
148                                 echo( "Importing {$base}..." );
149                                 $svar = 'added';
150                         }
151
152                         if ( isset( $options['source-wiki-url'] ) ) {
153                                 /* find comment text directly from source wiki, through MW's API */
154                                 $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
155                                 if ( $real_comment === false )
156                                         $commentText = $comment;
157                                 else
158                                         $commentText = $real_comment;
159
160                                 /* find user directly from source wiki, through MW's API */
161                                 $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
162                                 if ( $real_user === false ) {
163                                         $wgUser = $user;
164                                 } else {
165                                         $wgUser = User::newFromName( $real_user );
166                                         if ( $wgUser === false ) {
167                                                 # user does not exist in target wiki
168                                                 echo ( "failed: user '$real_user' does not exist in target wiki." );
169                                                 continue;
170                                         }
171                                 }
172                         } else {
173                                 # Find comment text
174                                 $commentText = false;
175
176                                 if ( $commentExt ) {
177                                         $f = findAuxFile( $file, $commentExt );
178                                         if ( !$f ) {
179                                                 echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
180                                         } else {
181                                                 $commentText = file_get_contents( $f );
182                                                 if ( !$f ) {
183                                                         echo( " Failed to load comment file {$f}, using default comment. " );
184                                                 }
185                                         }
186                                 }
187
188                                 if ( !$commentText ) {
189                                         $commentText = $comment;
190                                 }
191                         }
192
193
194                         # Import the file
195                         if ( isset( $options['dry'] ) ) {
196                                 echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
197                         } else {
198                                 $archive = $image->publish( $file );
199                                 if ( !$archive->isGood() ) {
200                                         echo( "failed.\n" );
201                                         $failed++;
202                                         continue;
203                                 }
204                         }
205
206                         $doProtect = false;
207                         $restrictions = array();
208
209                         global $wgRestrictionLevels;
210
211                         $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
212
213                         if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
214                                         $restrictions['move'] = $protectLevel;
215                                         $restrictions['edit'] = $protectLevel;
216                                         $doProtect = true;
217                         }
218                         if ( isset( $options['unprotect'] ) ) {
219                                         $restrictions['move'] = '';
220                                         $restrictions['edit'] = '';
221                                         $doProtect = true;
222                         }
223
224
225                         if ( isset( $options['dry'] ) ) {
226                                 echo( "done.\n" );
227                         } else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
228                                 # We're done!
229                                 echo( "done.\n" );
230                                 if ( $doProtect ) {
231                                                 # Protect the file
232                                                 $article = new Article( $title );
233                                                 echo "\nWaiting for slaves...\n";
234                                                 // Wait for slaves.
235                                                 sleep( 2.0 );
236                                                 wfWaitForSlaves( 1.0 );
237
238                                                 echo( "\nSetting image restrictions ... " );
239                                                 if ( $article->updateRestrictions( $restrictions ) )
240                                                         echo( "done.\n" );
241                                                 else
242                                                         echo( "failed.\n" );
243                                 }
244
245                         } else {
246                                 echo( "failed.\n" );
247                                 $svar = 'failed';
248                         }
249
250                         $$svar++;
251                         $processed++;
252
253                         if ( $limit && $processed >= $limit )
254                                 break;
255
256                         if ( $sleep )
257                                 sleep( $sleep );
258                 }
259
260                 # Print out some statistics
261                 echo( "\n" );
262                 foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
263                         'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
264                         'failed' => 'Failed' ) as $var => $desc ) {
265                         if ( $$var > 0 )
266                                 echo( "{$desc}: {$$var}\n" );
267                 }
268
269         } else {
270                 echo( "No suitable files could be found for import.\n" );
271         }
272
273 } else {
274         showUsage();
275 }
276
277 exit( 0 );
278
279 function showUsage( $reason = false ) {
280         if ( $reason ) {
281                 echo( $reason . "\n" );
282         }
283
284         echo <<<TEXT
285 Imports images and other media files into the wiki
286 USAGE: php importImages.php [options] <dir>
287
288 <dir> : Path to the directory containing images to be imported
289
290 Options:
291 --extensions=<exts>     Comma-separated list of allowable extensions, defaults to \$wgFileExtensions
292 --overwrite             Overwrite existing images with the same name (default is to skip them)
293 --limit=<num>           Limit the number of images to process. Ignored or skipped images are not counted.
294 --from=<name>           Ignore all files until the one with the given name. Useful for resuming
295                                                 aborted imports. <name> should be the file's canonical database form.
296 --skip-dupes            Skip images that were already uploaded under a different name (check SHA1)
297 --sleep=<sec>           Sleep between files. Useful mostly for debugging.
298 --user=<username>       Set username of uploader, default 'Maintenance script'
299 --check-userblock       Check if the user got blocked during import.
300 --comment=<text>        Set upload summary comment, default 'Importing image file'.
301 --comment-file=<file>   Set upload summary comment the the content of <file>.
302 --comment-ext=<ext>     Causes the comment for each file to be loaded from a file with the same name
303                         but the extension <ext>. If a global comment is also given, it is appended.
304 --license=<code>        Use an optional license template
305 --dry                   Dry run, don't import anything
306 --protect=<protect>     Specify the protect value (autoconfirmed,sysop)
307 --unprotect             Unprotects all uploaded images
308 --source-wiki-url   if specified, take User and Comment data for each imported file from this URL.
309                                         For example, --source-wiki-url="http://en.wikipedia.org/"
310
311 TEXT;
312         exit( 1 );
313 }