]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - includes/Import.php
MediaWiki 1.14.0
[autoinstallsdev/mediawiki.git] / includes / Import.php
1 <?php
2 /**
3  * MediaWiki page data importer
4  * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5  * http://www.mediawiki.org/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  * http://www.gnu.org/copyleft/gpl.html
21  *
22  * @file
23  * @ingroup SpecialPage
24  */
25
26 /**
27  *
28  * @ingroup SpecialPage
29  */
30 class WikiRevision {
31         var $title = null;
32         var $id = 0;
33         var $timestamp = "20010115000000";
34         var $user = 0;
35         var $user_text = "";
36         var $text = "";
37         var $comment = "";
38         var $minor = false;
39         var $type = "";
40         var $action = "";
41         var $params = "";
42
43         function setTitle( $title ) {
44                 if( is_object( $title ) ) {
45                         $this->title = $title;
46                 } elseif( is_null( $title ) ) {
47                         throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
48                 } else {
49                         throw new MWException( "WikiRevision given non-object title in import." );
50                 }
51         }
52
53         function setID( $id ) {
54                 $this->id = $id;
55         }
56
57         function setTimestamp( $ts ) {
58                 # 2003-08-05T18:30:02Z
59                 $this->timestamp = wfTimestamp( TS_MW, $ts );
60         }
61
62         function setUsername( $user ) {
63                 $this->user_text = $user;
64         }
65
66         function setUserIP( $ip ) {
67                 $this->user_text = $ip;
68         }
69
70         function setText( $text ) {
71                 $this->text = $text;
72         }
73
74         function setComment( $text ) {
75                 $this->comment = $text;
76         }
77
78         function setMinor( $minor ) {
79                 $this->minor = (bool)$minor;
80         }
81
82         function setSrc( $src ) {
83                 $this->src = $src;
84         }
85
86         function setFilename( $filename ) {
87                 $this->filename = $filename;
88         }
89
90         function setSize( $size ) {
91                 $this->size = intval( $size );
92         }
93         
94         function setType( $type ) {
95                 $this->type = $type;
96         }
97         
98         function setAction( $action ) {
99                 $this->action = $action;
100         }
101         
102         function setParams( $params ) {
103                 $this->params = $params;
104         }
105
106         function getTitle() {
107                 return $this->title;
108         }
109
110         function getID() {
111                 return $this->id;
112         }
113
114         function getTimestamp() {
115                 return $this->timestamp;
116         }
117
118         function getUser() {
119                 return $this->user_text;
120         }
121
122         function getText() {
123                 return $this->text;
124         }
125
126         function getComment() {
127                 return $this->comment;
128         }
129
130         function getMinor() {
131                 return $this->minor;
132         }
133
134         function getSrc() {
135                 return $this->src;
136         }
137
138         function getFilename() {
139                 return $this->filename;
140         }
141
142         function getSize() {
143                 return $this->size;
144         }
145         
146         function getType() {
147                 return $this->type;
148         }
149         
150         function getAction() {
151                 return $this->action;
152         }
153         
154         function getParams() {
155                 return $this->params;
156         }
157
158         function importOldRevision() {
159                 $dbw = wfGetDB( DB_MASTER );
160
161                 # Sneak a single revision into place
162                 $user = User::newFromName( $this->getUser() );
163                 if( $user ) {
164                         $userId = intval( $user->getId() );
165                         $userText = $user->getName();
166                 } else {
167                         $userId = 0;
168                         $userText = $this->getUser();
169                 }
170
171                 // avoid memory leak...?
172                 $linkCache = LinkCache::singleton();
173                 $linkCache->clear();
174
175                 $article = new Article( $this->title );
176                 $pageId = $article->getId();
177                 if( $pageId == 0 ) {
178                         # must create the page...
179                         $pageId = $article->insertOn( $dbw );
180                         $created = true;
181                 } else {
182                         $created = false;
183
184                         $prior = $dbw->selectField( 'revision', '1',
185                                 array( 'rev_page' => $pageId,
186                                         'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
187                                         'rev_user_text' => $userText,
188                                         'rev_comment'   => $this->getComment() ),
189                                 __METHOD__
190                         );
191                         if( $prior ) {
192                                 // FIXME: this could fail slightly for multiple matches :P
193                                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
194                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
195                                 return false;
196                         }
197                 }
198
199                 # FIXME: Use original rev_id optionally (better for backups)
200                 # Insert the row
201                 $revision = new Revision( array(
202                         'page'       => $pageId,
203                         'text'       => $this->getText(),
204                         'comment'    => $this->getComment(),
205                         'user'       => $userId,
206                         'user_text'  => $userText,
207                         'timestamp'  => $this->timestamp,
208                         'minor_edit' => $this->minor,
209                         ) );
210                 $revId = $revision->insertOn( $dbw );
211                 $changed = $article->updateIfNewerOn( $dbw, $revision );
212                 
213                 # To be on the safe side...
214                 $tempTitle = $GLOBALS['wgTitle'];
215                 $GLOBALS['wgTitle'] = $this->title;
216
217                 if( $created ) {
218                         wfDebug( __METHOD__ . ": running onArticleCreate\n" );
219                         Article::onArticleCreate( $this->title );
220
221                         wfDebug( __METHOD__ . ": running create updates\n" );
222                         $article->createUpdates( $revision );
223
224                 } elseif( $changed ) {
225                         wfDebug( __METHOD__ . ": running onArticleEdit\n" );
226                         Article::onArticleEdit( $this->title, 'skiptransclusions' ); // leave templatelinks for editUpdates()
227
228                         wfDebug( __METHOD__ . ": running edit updates\n" );
229                         $article->editUpdates(
230                                 $this->getText(),
231                                 $this->getComment(),
232                                 $this->minor,
233                                 $this->timestamp,
234                                 $revId );
235                 }
236                 $GLOBALS['wgTitle'] = $tempTitle;
237
238                 return true;
239         }
240         
241         function importLogItem() {
242                 $dbw = wfGetDB( DB_MASTER );
243                 # FIXME: this will not record autoblocks
244                 if( !$this->getTitle() ) {
245                         wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 
246                                 $this->timestamp . "\n" );
247                         return;
248                 }
249                 # Check if it exists already
250                 // FIXME: use original log ID (better for backups)
251                 $prior = $dbw->selectField( 'logging', '1',
252                         array( 'log_type' => $this->getType(),
253                                 'log_action'    => $this->getAction(),
254                                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
255                                 'log_namespace' => $this->getTitle()->getNamespace(),
256                                 'log_title'     => $this->getTitle()->getDBkey(),
257                                 'log_comment'   => $this->getComment(),
258                                 #'log_user_text' => $this->user_text,
259                                 'log_params'    => $this->params ),
260                         __METHOD__
261                 );
262                 // FIXME: this could fail slightly for multiple matches :P
263                 if( $prior ) {
264                         wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 
265                                 $this->timestamp . "\n" );
266                         return false;
267                 }
268                 $log_id = $dbw->nextSequenceValue( 'log_log_id_seq' );
269                 $data = array(
270                         'log_id' => $log_id,
271                         'log_type' => $this->type,
272                         'log_action' => $this->action,
273                         'log_timestamp' => $dbw->timestamp( $this->timestamp ),
274                         'log_user' => User::idFromName( $this->user_text ),
275                         #'log_user_text' => $this->user_text,
276                         'log_namespace' => $this->getTitle()->getNamespace(),
277                         'log_title' => $this->getTitle()->getDBkey(),
278                         'log_comment' => $this->getComment(),
279                         'log_params' => $this->params
280                 );
281                 $dbw->insert( 'logging', $data, __METHOD__ );
282         }
283
284         function importUpload() {
285                 wfDebug( __METHOD__ . ": STUB\n" );
286
287                 /**
288                         // from file revert...
289                         $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
290                         $comment = $wgRequest->getText( 'wpComment' );
291                         // TODO: Preserve file properties from database instead of reloading from file
292                         $status = $this->file->upload( $source, $comment, $comment );
293                         if( $status->isGood() ) {
294                 */
295
296                 /**
297                         // from file upload...
298                 $this->mLocalFile = wfLocalFile( $nt );
299                 $this->mDestName = $this->mLocalFile->getName();
300                 //....
301                         $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
302                         File::DELETE_SOURCE, $this->mFileProps );
303                         if ( !$status->isGood() ) {
304                                 $resultDetails = array( 'internal' => $status->getWikiText() );
305                 */
306
307                 // @fixme upload() uses $wgUser, which is wrong here
308                 // it may also create a page without our desire, also wrong potentially.
309                 // and, it will record a *current* upload, but we might want an archive version here
310
311                 $file = wfLocalFile( $this->getTitle() );
312                 if( !$file ) {
313                         var_dump( $file );
314                         wfDebug( "IMPORT: Bad file. :(\n" );
315                         return false;
316                 }
317
318                 $source = $this->downloadSource();
319                 if( !$source ) {
320                         wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
321                         return false;
322                 }
323
324                 $status = $file->upload( $source,
325                         $this->getComment(),
326                         $this->getComment(), // Initial page, if none present...
327                         File::DELETE_SOURCE,
328                         false, // props...
329                         $this->getTimestamp() );
330
331                 if( $status->isGood() ) {
332                         // yay?
333                         wfDebug( "IMPORT: is ok?\n" );
334                         return true;
335                 }
336
337                 wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
338                 return false;
339
340         }
341
342         function downloadSource() {
343                 global $wgEnableUploads;
344                 if( !$wgEnableUploads ) {
345                         return false;
346                 }
347
348                 $tempo = tempnam( wfTempDir(), 'download' );
349                 $f = fopen( $tempo, 'wb' );
350                 if( !$f ) {
351                         wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
352                         return false;
353                 }
354
355                 // @fixme!
356                 $src = $this->getSrc();
357                 $data = Http::get( $src );
358                 if( !$data ) {
359                         wfDebug( "IMPORT: couldn't fetch source $src\n" );
360                         fclose( $f );
361                         unlink( $tempo );
362                         return false;
363                 }
364
365                 fwrite( $f, $data );
366                 fclose( $f );
367
368                 return $tempo;
369         }
370
371 }
372
373 /**
374  * implements Special:Import
375  * @ingroup SpecialPage
376  */
377 class WikiImporter {
378         var $mDebug = false;
379         var $mSource = null;
380         var $mPageCallback = null;
381         var $mPageOutCallback = null;
382         var $mRevisionCallback = null;
383         var $mLogItemCallback = null;
384         var $mUploadCallback = null;
385         var $mTargetNamespace = null;
386         var $mXmlNamespace = false;
387         var $lastfield;
388         var $tagStack = array();
389
390         function __construct( $source ) {
391                 $this->setRevisionCallback( array( $this, "importRevision" ) );
392                 $this->setUploadCallback( array( $this, "importUpload" ) );
393                 $this->setLogItemCallback( array( $this, "importLogItem" ) );
394                 $this->mSource = $source;
395         }
396
397         function throwXmlError( $err ) {
398                 $this->debug( "FAILURE: $err" );
399                 wfDebug( "WikiImporter XML error: $err\n" );
400         }
401
402         function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
403                  if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
404                         $prefix = str_replace( '/','\/',$prefix );
405                         $this->mXmlNamespace='/^'.$prefix.':/';
406                  }
407         }
408
409         function stripXmlNamespace($name) {
410                 if( $this->mXmlNamespace ) {
411                        return(preg_replace($this->mXmlNamespace,'',$name,1));
412                 }
413                 else {
414                        return($name);
415                 }
416         }
417    
418         # --------------
419
420         function doImport() {
421                 if( empty( $this->mSource ) ) {
422                         return new WikiErrorMsg( "importnotext" );
423                 }
424
425                 $parser = xml_parser_create_ns( "UTF-8" );
426
427                 # case folding violates XML standard, turn it off
428                 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
429
430                 xml_set_object( $parser, $this );
431                 xml_set_element_handler( $parser, "in_start", "" );
432                 xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
433
434                 $offset = 0; // for context extraction on error reporting
435                 do {
436                         $chunk = $this->mSource->readChunk();
437                         if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
438                                 wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
439                                 return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
440                         }
441                         $offset += strlen( $chunk );
442                 } while( $chunk !== false && !$this->mSource->atEnd() );
443                 xml_parser_free( $parser );
444
445                 return true;
446         }
447
448         function debug( $data ) {
449                 if( $this->mDebug ) {
450                         wfDebug( "IMPORT: $data\n" );
451                 }
452         }
453
454         function notice( $data ) {
455                 global $wgCommandLineMode;
456                 if( $wgCommandLineMode ) {
457                         print "$data\n";
458                 } else {
459                         global $wgOut;
460                         $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
461                 }
462         }
463
464         /**
465          * Set debug mode...
466          */
467         function setDebug( $debug ) {
468                 $this->mDebug = $debug;
469         }
470
471         /**
472          * Sets the action to perform as each new page in the stream is reached.
473          * @param $callback callback
474          * @return callback
475          */
476         function setPageCallback( $callback ) {
477                 $previous = $this->mPageCallback;
478                 $this->mPageCallback = $callback;
479                 return $previous;
480         }
481
482         /**
483          * Sets the action to perform as each page in the stream is completed.
484          * Callback accepts the page title (as a Title object), a second object
485          * with the original title form (in case it's been overridden into a
486          * local namespace), and a count of revisions.
487          *
488          * @param $callback callback
489          * @return callback
490          */
491         function setPageOutCallback( $callback ) {
492                 $previous = $this->mPageOutCallback;
493                 $this->mPageOutCallback = $callback;
494                 return $previous;
495         }
496
497         /**
498          * Sets the action to perform as each page revision is reached.
499          * @param $callback callback
500          * @return callback
501          */
502         function setRevisionCallback( $callback ) {
503                 $previous = $this->mRevisionCallback;
504                 $this->mRevisionCallback = $callback;
505                 return $previous;
506         }
507
508         /**
509          * Sets the action to perform as each file upload version is reached.
510          * @param $callback callback
511          * @return callback
512          */
513         function setUploadCallback( $callback ) {
514                 $previous = $this->mUploadCallback;
515                 $this->mUploadCallback = $callback;
516                 return $previous;
517         }
518         
519         /**
520          * Sets the action to perform as each log item reached.
521          * @param $callback callback
522          * @return callback
523          */
524         function setLogItemCallback( $callback ) {
525                 $previous = $this->mLogItemCallback;
526                 $this->mLogItemCallback = $callback;
527                 return $previous;
528         }
529
530         /**
531          * Set a target namespace to override the defaults
532          */
533         function setTargetNamespace( $namespace ) {
534                 if( is_null( $namespace ) ) {
535                         // Don't override namespaces
536                         $this->mTargetNamespace = null;
537                 } elseif( $namespace >= 0 ) {
538                         // FIXME: Check for validity
539                         $this->mTargetNamespace = intval( $namespace );
540                 } else {
541                         return false;
542                 }
543         }
544
545         /**
546          * Default per-revision callback, performs the import.
547          * @param $revision WikiRevision
548          * @private
549          */
550         function importRevision( $revision ) {
551                 $dbw = wfGetDB( DB_MASTER );
552                 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
553         }
554         
555         /**
556          * Default per-revision callback, performs the import.
557          * @param $revision WikiRevision
558          * @private
559          */
560         function importLogItem( $rev ) {
561                 $dbw = wfGetDB( DB_MASTER );
562                 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
563         }
564
565         /**
566          * Dummy for now...
567          */
568         function importUpload( $revision ) {
569                 //$dbw = wfGetDB( DB_MASTER );
570                 //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
571                 return false;
572         }
573
574         /**
575          * Alternate per-revision callback, for debugging.
576          * @param $revision WikiRevision
577          * @private
578          */
579         function debugRevisionHandler( &$revision ) {
580                 $this->debug( "Got revision:" );
581                 if( is_object( $revision->title ) ) {
582                         $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
583                 } else {
584                         $this->debug( "-- Title: <invalid>" );
585                 }
586                 $this->debug( "-- User: " . $revision->user_text );
587                 $this->debug( "-- Timestamp: " . $revision->timestamp );
588                 $this->debug( "-- Comment: " . $revision->comment );
589                 $this->debug( "-- Text: " . $revision->text );
590         }
591
592         /**
593          * Notify the callback function when a new <page> is reached.
594          * @param $title Title
595          * @private
596          */
597         function pageCallback( $title ) {
598                 if( is_callable( $this->mPageCallback ) ) {
599                         call_user_func( $this->mPageCallback, $title );
600                 }
601         }
602
603         /**
604          * Notify the callback function when a </page> is closed.
605          * @param $title Title
606          * @param $origTitle Title
607          * @param $revisionCount int
608          * @param $successCount Int: number of revisions for which callback returned true
609          * @private
610          */
611         function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
612                 if( is_callable( $this->mPageOutCallback ) ) {
613                         call_user_func( $this->mPageOutCallback, $title, $origTitle,
614                                 $revisionCount, $successCount );
615                 }
616         }
617
618         # XML parser callbacks from here out -- beware!
619         function donothing( $parser, $x, $y="" ) {
620                 #$this->debug( "donothing" );
621         }
622
623         function in_start( $parser, $name, $attribs ) {
624                 $name = $this->stripXmlNamespace($name);
625                 $this->debug( "in_start $name" );
626                 if( $name != "mediawiki" ) {
627                         return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
628                 }
629                 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
630         }
631
632         function in_mediawiki( $parser, $name, $attribs ) {
633                 $name = $this->stripXmlNamespace($name);
634                 $this->debug( "in_mediawiki $name" );
635                 if( $name == 'siteinfo' ) {
636                         xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
637                 } elseif( $name == 'page' ) {
638                         $this->push( $name );
639                         $this->workRevisionCount = 0;
640                         $this->workSuccessCount = 0;
641                         $this->uploadCount = 0;
642                         $this->uploadSuccessCount = 0;
643                         xml_set_element_handler( $parser, "in_page", "out_page" );
644                 } elseif( $name == 'logitem' ) {
645                         $this->push( $name );
646                         $this->workRevision = new WikiRevision;
647                         xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
648                 } else {
649                         return $this->throwXMLerror( "Expected <page>, got <$name>" );
650                 }
651         }
652         function out_mediawiki( $parser, $name ) {
653                 $name = $this->stripXmlNamespace($name);
654                 $this->debug( "out_mediawiki $name" );
655                 if( $name != "mediawiki" ) {
656                         return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
657                 }
658                 xml_set_element_handler( $parser, "donothing", "donothing" );
659         }
660
661
662         function in_siteinfo( $parser, $name, $attribs ) {
663                 // no-ops for now
664                 $name = $this->stripXmlNamespace($name);
665                 $this->debug( "in_siteinfo $name" );
666                 switch( $name ) {
667                 case "sitename":
668                 case "base":
669                 case "generator":
670                 case "case":
671                 case "namespaces":
672                 case "namespace":
673                         break;
674                 default:
675                         return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
676                 }
677         }
678
679         function out_siteinfo( $parser, $name ) {
680                 $name = $this->stripXmlNamespace($name);
681                 if( $name == "siteinfo" ) {
682                         xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
683                 }
684         }
685
686
687         function in_page( $parser, $name, $attribs ) {
688                 $name = $this->stripXmlNamespace($name);
689                 $this->debug( "in_page $name" );
690                 switch( $name ) {
691                 case "id":
692                 case "title":
693                 case "restrictions":
694                         $this->appendfield = $name;
695                         $this->appenddata = "";
696                         xml_set_element_handler( $parser, "in_nothing", "out_append" );
697                         xml_set_character_data_handler( $parser, "char_append" );
698                         break;
699                 case "revision":
700                         $this->push( "revision" );
701                         if( is_object( $this->pageTitle ) ) {
702                                 $this->workRevision = new WikiRevision;
703                                 $this->workRevision->setTitle( $this->pageTitle );
704                                 $this->workRevisionCount++;
705                         } else {
706                                 // Skipping items due to invalid page title
707                                 $this->workRevision = null;
708                         }
709                         xml_set_element_handler( $parser, "in_revision", "out_revision" );
710                         break;
711                 case "upload":
712                         $this->push( "upload" );
713                         if( is_object( $this->pageTitle ) ) {
714                                 $this->workRevision = new WikiRevision;
715                                 $this->workRevision->setTitle( $this->pageTitle );
716                                 $this->uploadCount++;
717                         } else {
718                                 // Skipping items due to invalid page title
719                                 $this->workRevision = null;
720                         }
721                         xml_set_element_handler( $parser, "in_upload", "out_upload" );
722                         break;
723                 default:
724                         return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
725                 }
726         }
727
728         function out_page( $parser, $name ) {
729                 $name = $this->stripXmlNamespace($name);
730                 $this->debug( "out_page $name" );
731                 $this->pop();
732                 if( $name != "page" ) {
733                         return $this->throwXMLerror( "Expected </page>, got </$name>" );
734                 }
735                 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
736
737                 $this->pageOutCallback( $this->pageTitle, $this->origTitle,
738                         $this->workRevisionCount, $this->workSuccessCount );
739
740                 $this->workTitle = null;
741                 $this->workRevision = null;
742                 $this->workRevisionCount = 0;
743                 $this->workSuccessCount = 0;
744                 $this->pageTitle = null;
745                 $this->origTitle = null;
746         }
747
748         function in_nothing( $parser, $name, $attribs ) {
749                 $name = $this->stripXmlNamespace($name);
750                 $this->debug( "in_nothing $name" );
751                 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
752         }
753
754         function char_append( $parser, $data ) {
755                 $this->debug( "char_append '$data'" );
756                 $this->appenddata .= $data;
757         }
758
759         function out_append( $parser, $name ) {
760                 $name = $this->stripXmlNamespace($name);
761                 $this->debug( "out_append $name" );
762                 if( $name != $this->appendfield ) {
763                         return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
764                 }
765
766                 switch( $this->appendfield ) {
767                 case "title":
768                         $this->workTitle = $this->appenddata;
769                         $this->origTitle = Title::newFromText( $this->workTitle );
770                         if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
771                                 $this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
772                                         $this->origTitle->getDBkey() );
773                         } else {
774                                 $this->pageTitle = Title::newFromText( $this->workTitle );
775                         }
776                         if( is_null( $this->pageTitle ) ) {
777                                 // Invalid page title? Ignore the page
778                                 $this->notice( "Skipping invalid page title '$this->workTitle'" );
779                         } elseif( $this->pageTitle->getInterwiki() != '' ) {
780                                 $this->notice( "Skipping interwiki page title '$this->workTitle'" );
781                                 $this->pageTitle = null;
782                         } else {
783                                 $this->pageCallback( $this->workTitle );
784                         }
785                         break;
786                 case "id":
787                         if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
788                                 if( $this->workRevision )
789                                         $this->workRevision->setID( $this->appenddata );
790                         }
791                         break;
792                 case "text":
793                         if( $this->workRevision )
794                                 $this->workRevision->setText( $this->appenddata );
795                         break;
796                 case "username":
797                         if( $this->workRevision )
798                                 $this->workRevision->setUsername( $this->appenddata );
799                         break;
800                 case "ip":
801                         if( $this->workRevision )
802                                 $this->workRevision->setUserIP( $this->appenddata );
803                         break;
804                 case "timestamp":
805                         if( $this->workRevision )
806                                 $this->workRevision->setTimestamp( $this->appenddata );
807                         break;
808                 case "comment":
809                         if( $this->workRevision )
810                                 $this->workRevision->setComment( $this->appenddata );
811                         break;
812                 case "type":
813                         if( $this->workRevision )
814                                 $this->workRevision->setType( $this->appenddata );
815                         break;
816                 case "action":
817                         if( $this->workRevision )
818                                 $this->workRevision->setAction( $this->appenddata );
819                         break;
820                 case "logtitle":
821                         if( $this->workRevision )
822                                 $this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
823                         break;
824                 case "params":
825                         if( $this->workRevision )
826                                 $this->workRevision->setParams( $this->appenddata );
827                         break;
828                 case "minor":
829                         if( $this->workRevision )
830                                 $this->workRevision->setMinor( true );
831                         break;
832                 case "filename":
833                         if( $this->workRevision )
834                                 $this->workRevision->setFilename( $this->appenddata );
835                         break;
836                 case "src":
837                         if( $this->workRevision )
838                                 $this->workRevision->setSrc( $this->appenddata );
839                         break;
840                 case "size":
841                         if( $this->workRevision )
842                                 $this->workRevision->setSize( intval( $this->appenddata ) );
843                         break;
844                 default:
845                         $this->debug( "Bad append: {$this->appendfield}" );
846                 }
847                 $this->appendfield = "";
848                 $this->appenddata = "";
849
850                 $parent = $this->parentTag();
851                 xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
852                 xml_set_character_data_handler( $parser, "donothing" );
853         }
854
855         function in_revision( $parser, $name, $attribs ) {
856                 $name = $this->stripXmlNamespace($name);
857                 $this->debug( "in_revision $name" );
858                 switch( $name ) {
859                 case "id":
860                 case "timestamp":
861                 case "comment":
862                 case "minor":
863                 case "text":
864                         $this->appendfield = $name;
865                         xml_set_element_handler( $parser, "in_nothing", "out_append" );
866                         xml_set_character_data_handler( $parser, "char_append" );
867                         break;
868                 case "contributor":
869                         $this->push( "contributor" );
870                         xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
871                         break;
872                 default:
873                         return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
874                 }
875         }
876
877         function out_revision( $parser, $name ) {
878                 $name = $this->stripXmlNamespace($name);
879                 $this->debug( "out_revision $name" );
880                 $this->pop();
881                 if( $name != "revision" ) {
882                         return $this->throwXMLerror( "Expected </revision>, got </$name>" );
883                 }
884                 xml_set_element_handler( $parser, "in_page", "out_page" );
885
886                 if( $this->workRevision ) {
887                         $ok = call_user_func_array( $this->mRevisionCallback,
888                                 array( $this->workRevision, $this ) );
889                         if( $ok ) {
890                                 $this->workSuccessCount++;
891                         }
892                 }
893         }
894         
895         function in_logitem( $parser, $name, $attribs ) {
896                 $name = $this->stripXmlNamespace($name);
897                 $this->debug( "in_logitem $name" );
898                 switch( $name ) {
899                 case "id":
900                 case "timestamp":
901                 case "comment":
902                 case "type":
903                 case "action":
904                 case "logtitle":
905                 case "params":
906                         $this->appendfield = $name;
907                         xml_set_element_handler( $parser, "in_nothing", "out_append" );
908                         xml_set_character_data_handler( $parser, "char_append" );
909                         break;
910                 case "contributor":
911                         $this->push( "contributor" );
912                         xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
913                         break;
914                 default:
915                         return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
916                 }
917         }
918
919         function out_logitem( $parser, $name ) {
920                 $name = $this->stripXmlNamespace($name);
921                 $this->debug( "out_logitem $name" );
922                 $this->pop();
923                 if( $name != "logitem" ) {
924                         return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
925                 }
926                 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
927
928                 if( $this->workRevision ) {
929                         $ok = call_user_func_array( $this->mLogItemCallback,
930                                 array( $this->workRevision, $this ) );
931                         if( $ok ) {
932                                 $this->workSuccessCount++;
933                         }
934                 }
935         }
936
937         function in_upload( $parser, $name, $attribs ) {
938                 $name = $this->stripXmlNamespace($name);
939                 $this->debug( "in_upload $name" );
940                 switch( $name ) {
941                 case "timestamp":
942                 case "comment":
943                 case "text":
944                 case "filename":
945                 case "src":
946                 case "size":
947                         $this->appendfield = $name;
948                         xml_set_element_handler( $parser, "in_nothing", "out_append" );
949                         xml_set_character_data_handler( $parser, "char_append" );
950                         break;
951                 case "contributor":
952                         $this->push( "contributor" );
953                         xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
954                         break;
955                 default:
956                         return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
957                 }
958         }
959
960         function out_upload( $parser, $name ) {
961                 $name = $this->stripXmlNamespace($name);
962                 $this->debug( "out_revision $name" );
963                 $this->pop();
964                 if( $name != "upload" ) {
965                         return $this->throwXMLerror( "Expected </upload>, got </$name>" );
966                 }
967                 xml_set_element_handler( $parser, "in_page", "out_page" );
968
969                 if( $this->workRevision ) {
970                         $ok = call_user_func_array( $this->mUploadCallback,
971                                 array( $this->workRevision, $this ) );
972                         if( $ok ) {
973                                 $this->workUploadSuccessCount++;
974                         }
975                 }
976         }
977
978         function in_contributor( $parser, $name, $attribs ) {
979                 $name = $this->stripXmlNamespace($name);
980                 $this->debug( "in_contributor $name" );
981                 switch( $name ) {
982                 case "username":
983                 case "ip":
984                 case "id":
985                         $this->appendfield = $name;
986                         xml_set_element_handler( $parser, "in_nothing", "out_append" );
987                         xml_set_character_data_handler( $parser, "char_append" );
988                         break;
989                 default:
990                         $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
991                 }
992         }
993
994         function out_contributor( $parser, $name ) {
995                 $name = $this->stripXmlNamespace($name);
996                 $this->debug( "out_contributor $name" );
997                 $this->pop();
998                 if( $name != "contributor" ) {
999                         return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
1000                 }
1001                 $parent = $this->parentTag();
1002                 xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
1003         }
1004
1005         private function push( $name ) {
1006                 array_push( $this->tagStack, $name );
1007                 $this->debug( "PUSH $name" );
1008         }
1009
1010         private function pop() {
1011                 $name = array_pop( $this->tagStack );
1012                 $this->debug( "POP $name" );
1013                 return $name;
1014         }
1015
1016         private function parentTag() {
1017                 $name = $this->tagStack[count( $this->tagStack ) - 1];
1018                 $this->debug( "PARENT $name" );
1019                 return $name;
1020         }
1021
1022 }
1023
1024 /**
1025  * @todo document (e.g. one-sentence class description).
1026  * @ingroup SpecialPage
1027  */
1028 class ImportStringSource {
1029         function __construct( $string ) {
1030                 $this->mString = $string;
1031                 $this->mRead = false;
1032         }
1033
1034         function atEnd() {
1035                 return $this->mRead;
1036         }
1037
1038         function readChunk() {
1039                 if( $this->atEnd() ) {
1040                         return false;
1041                 } else {
1042                         $this->mRead = true;
1043                         return $this->mString;
1044                 }
1045         }
1046 }
1047
1048 /**
1049  * @todo document (e.g. one-sentence class description).
1050  * @ingroup SpecialPage
1051  */
1052 class ImportStreamSource {
1053         function __construct( $handle ) {
1054                 $this->mHandle = $handle;
1055         }
1056
1057         function atEnd() {
1058                 return feof( $this->mHandle );
1059         }
1060
1061         function readChunk() {
1062                 return fread( $this->mHandle, 32768 );
1063         }
1064
1065         static function newFromFile( $filename ) {
1066                 $file = @fopen( $filename, 'rt' );
1067                 if( !$file ) {
1068                         return new WikiErrorMsg( "importcantopen" );
1069                 }
1070                 return new ImportStreamSource( $file );
1071         }
1072
1073         static function newFromUpload( $fieldname = "xmlimport" ) {
1074                 $upload =& $_FILES[$fieldname];
1075
1076                 if( !isset( $upload ) || !$upload['name'] ) {
1077                         return new WikiErrorMsg( 'importnofile' );
1078                 }
1079                 if( !empty( $upload['error'] ) ) {
1080                         switch($upload['error']){
1081                                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
1082                                         return new WikiErrorMsg( 'importuploaderrorsize' );
1083                                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
1084                                         return new WikiErrorMsg( 'importuploaderrorsize' );
1085                                 case 3: # The uploaded file was only partially uploaded
1086                                         return new WikiErrorMsg( 'importuploaderrorpartial' );
1087                             case 6: #Missing a temporary folder. Introduced in PHP 4.3.10 and PHP 5.0.3.
1088                                 return new WikiErrorMsg( 'importuploaderrortemp' );
1089                             # case else: # Currently impossible
1090                         }
1091
1092                 }
1093                 $fname = $upload['tmp_name'];
1094                 if( is_uploaded_file( $fname ) ) {
1095                         return ImportStreamSource::newFromFile( $fname );
1096                 } else {
1097                         return new WikiErrorMsg( 'importnofile' );
1098                 }
1099         }
1100
1101         static function newFromURL( $url, $method = 'GET' ) {
1102                 wfDebug( __METHOD__ . ": opening $url\n" );
1103                 # Use the standard HTTP fetch function; it times out
1104                 # quicker and sorts out user-agent problems which might
1105                 # otherwise prevent importing from large sites, such
1106                 # as the Wikimedia cluster, etc.
1107                 $data = Http::request( $method, $url );
1108                 if( $data !== false ) {
1109                         $file = tmpfile();
1110                         fwrite( $file, $data );
1111                         fflush( $file );
1112                         fseek( $file, 0 );
1113                         return new ImportStreamSource( $file );
1114                 } else {
1115                         return new WikiErrorMsg( 'importcantopen' );
1116                 }
1117         }
1118
1119         public static function newFromInterwiki( $interwiki, $page, $history=false ) {
1120                 if( $page == '' ) {
1121                         return new WikiErrorMsg( 'import-noarticle' );
1122                 }
1123                 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
1124                 if( is_null( $link ) || $link->getInterwiki() == '' ) {
1125                         return new WikiErrorMsg( 'importbadinterwiki' );
1126                 } else {
1127                         $params = $history ? 'history=1' : '';
1128                         $url = $link->getFullUrl( $params );
1129                         # For interwikis, use POST to avoid redirects.
1130                         return ImportStreamSource::newFromURL( $url, "POST" );
1131                 }
1132         }
1133 }