]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - includes/import/WikiRevision.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / includes / import / WikiRevision.php
1 <?php
2 /**
3  * MediaWiki page data importer.
4  *
5  * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
6  * https://www.mediawiki.org/
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  * http://www.gnu.org/copyleft/gpl.html
22  *
23  * @file
24  * @ingroup SpecialPage
25  */
26
27 /**
28  * Represents a revision, log entry or upload during the import process.
29  * This class sticks closely to the structure of the XML dump.
30  *
31  * @since 1.2
32  *
33  * @ingroup SpecialPage
34  */
35 class WikiRevision {
36
37         /**
38          * @since 1.17
39          * @deprecated in 1.29. Unused.
40          * @note Introduced in 9b3128eb2b654761f21fd4ca1d5a1a4b796dc912, unused there, unused now.
41          */
42         public $importer = null;
43
44         /**
45          * @since 1.2
46          * @var Title
47          */
48         public $title = null;
49
50         /**
51          * @since 1.6.4
52          * @var int
53          */
54         public $id = 0;
55
56         /**
57          * @since 1.2
58          * @var string
59          */
60         public $timestamp = "20010115000000";
61
62         /**
63          * @since 1.2
64          * @var int
65          * @deprecated in 1.29. Unused.
66          * @note Introduced in 436a028086fb3f01c4605c5ad2964d56f9306aca, unused there, unused now.
67          */
68         public $user = 0;
69
70         /**
71          * @since 1.2
72          * @var string
73          */
74         public $user_text = "";
75
76         /**
77          * @since 1.27
78          * @var User
79          */
80         public $userObj = null;
81
82         /**
83          * @since 1.21
84          * @var string
85          */
86         public $model = null;
87
88         /**
89          * @since 1.21
90          * @var string
91          */
92         public $format = null;
93
94         /**
95          * @since 1.2
96          * @var string
97          */
98         public $text = "";
99
100         /**
101          * @since 1.12.2
102          * @var int
103          */
104         protected $size;
105
106         /**
107          * @since 1.21
108          * @var Content
109          */
110         public $content = null;
111
112         /**
113          * @since 1.24
114          * @var ContentHandler
115          */
116         protected $contentHandler = null;
117
118         /**
119          * @since 1.2.6
120          * @var string
121          */
122         public $comment = "";
123
124         /**
125          * @since 1.5.7
126          * @var bool
127          */
128         public $minor = false;
129
130         /**
131          * @since 1.12.2
132          * @var string
133          */
134         public $type = "";
135
136         /**
137          * @since 1.12.2
138          * @var string
139          */
140         public $action = "";
141
142         /**
143          * @since 1.12.2
144          * @var string
145          */
146         public $params = "";
147
148         /**
149          * @since 1.17
150          * @var string
151          */
152         public $fileSrc = '';
153
154         /**
155          * @since 1.17
156          * @var bool|string
157          */
158         public $sha1base36 = false;
159
160         /**
161          * @since 1.17
162          * @var string
163          */
164         public $archiveName = '';
165
166         /**
167          * @since 1.12.2
168          */
169         protected $filename;
170
171         /**
172          * @since 1.12.2
173          * @var mixed
174          */
175         protected $src;
176
177         /**
178          * @since 1.18
179          * @var bool
180          * @todo Unused?
181          */
182         public $isTemp = false;
183
184         /**
185          * @since 1.18
186          * @deprecated 1.29 use Wikirevision::isTempSrc()
187          * First written to in 43d5d3b682cc1733ad01a837d11af4a402d57e6a
188          * Actually introduced in 52cd34acf590e5be946b7885ffdc13a157c1c6cf
189          */
190         public $fileIsTemp;
191
192         /** @var bool */
193         private $mNoUpdates = false;
194
195         /** @var Config $config */
196         private $config;
197
198         public function __construct( Config $config ) {
199                 $this->config = $config;
200         }
201
202         /**
203          * @since 1.7 taking a Title object (string before)
204          * @param Title $title
205          * @throws MWException
206          */
207         public function setTitle( $title ) {
208                 if ( is_object( $title ) ) {
209                         $this->title = $title;
210                 } elseif ( is_null( $title ) ) {
211                         throw new MWException( "WikiRevision given a null title in import. "
212                                 . "You may need to adjust \$wgLegalTitleChars." );
213                 } else {
214                         throw new MWException( "WikiRevision given non-object title in import." );
215                 }
216         }
217
218         /**
219          * @since 1.6.4
220          * @param int $id
221          */
222         public function setID( $id ) {
223                 $this->id = $id;
224         }
225
226         /**
227          * @since 1.2
228          * @param string $ts
229          */
230         public function setTimestamp( $ts ) {
231                 # 2003-08-05T18:30:02Z
232                 $this->timestamp = wfTimestamp( TS_MW, $ts );
233         }
234
235         /**
236          * @since 1.2
237          * @param string $user
238          */
239         public function setUsername( $user ) {
240                 $this->user_text = $user;
241         }
242
243         /**
244          * @since 1.27
245          * @param User $user
246          */
247         public function setUserObj( $user ) {
248                 $this->userObj = $user;
249         }
250
251         /**
252          * @since 1.2
253          * @param string $ip
254          */
255         public function setUserIP( $ip ) {
256                 $this->user_text = $ip;
257         }
258
259         /**
260          * @since 1.21
261          * @param string $model
262          */
263         public function setModel( $model ) {
264                 $this->model = $model;
265         }
266
267         /**
268          * @since 1.21
269          * @param string $format
270          */
271         public function setFormat( $format ) {
272                 $this->format = $format;
273         }
274
275         /**
276          * @since 1.2
277          * @param string $text
278          */
279         public function setText( $text ) {
280                 $this->text = $text;
281         }
282
283         /**
284          * @since 1.2.6
285          * @param string $text
286          */
287         public function setComment( $text ) {
288                 $this->comment = $text;
289         }
290
291         /**
292          * @since 1.5.7
293          * @param bool $minor
294          */
295         public function setMinor( $minor ) {
296                 $this->minor = (bool)$minor;
297         }
298
299         /**
300          * @since 1.12.2
301          * @param mixed $src
302          */
303         public function setSrc( $src ) {
304                 $this->src = $src;
305         }
306
307         /**
308          * @since 1.17
309          * @param string $src
310          * @param bool $isTemp
311          */
312         public function setFileSrc( $src, $isTemp ) {
313                 $this->fileSrc = $src;
314                 $this->fileIsTemp = $isTemp;
315                 $this->isTemp = $isTemp;
316         }
317
318         /**
319          * @since 1.17
320          * @param string $sha1base36
321          */
322         public function setSha1Base36( $sha1base36 ) {
323                 $this->sha1base36 = $sha1base36;
324         }
325
326         /**
327          * @since 1.12.2
328          * @param string $filename
329          */
330         public function setFilename( $filename ) {
331                 $this->filename = $filename;
332         }
333
334         /**
335          * @since 1.17
336          * @param string $archiveName
337          */
338         public function setArchiveName( $archiveName ) {
339                 $this->archiveName = $archiveName;
340         }
341
342         /**
343          * @since 1.12.2
344          * @param int $size
345          */
346         public function setSize( $size ) {
347                 $this->size = intval( $size );
348         }
349
350         /**
351          * @since 1.12.2
352          * @param string $type
353          */
354         public function setType( $type ) {
355                 $this->type = $type;
356         }
357
358         /**
359          * @since 1.12.2
360          * @param string $action
361          */
362         public function setAction( $action ) {
363                 $this->action = $action;
364         }
365
366         /**
367          * @since 1.12.2
368          * @param array $params
369          */
370         public function setParams( $params ) {
371                 $this->params = $params;
372         }
373
374         /**
375          * @since 1.18
376          * @param bool $noupdates
377          */
378         public function setNoUpdates( $noupdates ) {
379                 $this->mNoUpdates = $noupdates;
380         }
381
382         /**
383          * @since 1.2
384          * @return Title
385          */
386         public function getTitle() {
387                 return $this->title;
388         }
389
390         /**
391          * @since 1.6.4
392          * @return int
393          */
394         public function getID() {
395                 return $this->id;
396         }
397
398         /**
399          * @since 1.2
400          * @return string
401          */
402         public function getTimestamp() {
403                 return $this->timestamp;
404         }
405
406         /**
407          * @since 1.2
408          * @return string
409          */
410         public function getUser() {
411                 return $this->user_text;
412         }
413
414         /**
415          * @since 1.27
416          * @return User
417          */
418         public function getUserObj() {
419                 return $this->userObj;
420         }
421
422         /**
423          * @since 1.2
424          * @return string
425          */
426         public function getText() {
427                 return $this->text;
428         }
429
430         /**
431          * @since 1.24
432          * @return ContentHandler
433          */
434         public function getContentHandler() {
435                 if ( is_null( $this->contentHandler ) ) {
436                         $this->contentHandler = ContentHandler::getForModelID( $this->getModel() );
437                 }
438
439                 return $this->contentHandler;
440         }
441
442         /**
443          * @since 1.21
444          * @return Content
445          */
446         public function getContent() {
447                 if ( is_null( $this->content ) ) {
448                         $handler = $this->getContentHandler();
449                         $this->content = $handler->unserializeContent( $this->text, $this->getFormat() );
450                 }
451
452                 return $this->content;
453         }
454
455         /**
456          * @since 1.21
457          * @return string
458          */
459         public function getModel() {
460                 if ( is_null( $this->model ) ) {
461                         $this->model = $this->getTitle()->getContentModel();
462                 }
463
464                 return $this->model;
465         }
466
467         /**
468          * @since 1.21
469          * @return string
470          */
471         public function getFormat() {
472                 if ( is_null( $this->format ) ) {
473                         $this->format = $this->getContentHandler()->getDefaultFormat();
474                 }
475
476                 return $this->format;
477         }
478
479         /**
480          * @since 1.2.6
481          * @return string
482          */
483         public function getComment() {
484                 return $this->comment;
485         }
486
487         /**
488          * @since 1.5.7
489          * @return bool
490          */
491         public function getMinor() {
492                 return $this->minor;
493         }
494
495         /**
496          * @since 1.12.2
497          * @return mixed
498          */
499         public function getSrc() {
500                 return $this->src;
501         }
502
503         /**
504          * @since 1.17
505          * @return bool|string
506          */
507         public function getSha1() {
508                 if ( $this->sha1base36 ) {
509                         return Wikimedia\base_convert( $this->sha1base36, 36, 16 );
510                 }
511                 return false;
512         }
513
514         /**
515          * @since 1.17
516          * @return string
517          */
518         public function getFileSrc() {
519                 return $this->fileSrc;
520         }
521
522         /**
523          * @since 1.17
524          * @return bool
525          */
526         public function isTempSrc() {
527                 return $this->isTemp;
528         }
529
530         /**
531          * @since 1.12.2
532          * @return mixed
533          */
534         public function getFilename() {
535                 return $this->filename;
536         }
537
538         /**
539          * @since 1.17
540          * @return string
541          */
542         public function getArchiveName() {
543                 return $this->archiveName;
544         }
545
546         /**
547          * @since 1.12.2
548          * @return mixed
549          */
550         public function getSize() {
551                 return $this->size;
552         }
553
554         /**
555          * @since 1.12.2
556          * @return string
557          */
558         public function getType() {
559                 return $this->type;
560         }
561
562         /**
563          * @since 1.12.2
564          * @return string
565          */
566         public function getAction() {
567                 return $this->action;
568         }
569
570         /**
571          * @since 1.12.2
572          * @return string
573          */
574         public function getParams() {
575                 return $this->params;
576         }
577
578         /**
579          * @since 1.4.1
580          * @return bool
581          */
582         public function importOldRevision() {
583                 $dbw = wfGetDB( DB_MASTER );
584
585                 # Sneak a single revision into place
586                 $user = $this->getUserObj() ?: User::newFromName( $this->getUser() );
587                 if ( $user ) {
588                         $userId = intval( $user->getId() );
589                         $userText = $user->getName();
590                 } else {
591                         $userId = 0;
592                         $userText = $this->getUser();
593                         $user = new User;
594                 }
595
596                 // avoid memory leak...?
597                 Title::clearCaches();
598
599                 $page = WikiPage::factory( $this->title );
600                 $page->loadPageData( 'fromdbmaster' );
601                 if ( !$page->exists() ) {
602                         // must create the page...
603                         $pageId = $page->insertOn( $dbw );
604                         $created = true;
605                         $oldcountable = null;
606                 } else {
607                         $pageId = $page->getId();
608                         $created = false;
609
610                         // Note: sha1 has been in XML dumps since 2012. If you have an
611                         // older dump, the duplicate detection here won't work.
612                         $prior = $dbw->selectField( 'revision', '1',
613                                 [ 'rev_page' => $pageId,
614                                         'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
615                                         'rev_sha1' => $this->sha1base36 ],
616                                 __METHOD__
617                         );
618                         if ( $prior ) {
619                                 // @todo FIXME: This could fail slightly for multiple matches :P
620                                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
621                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
622                                 return false;
623                         }
624                 }
625
626                 if ( !$pageId ) {
627                         // This seems to happen if two clients simultaneously try to import the
628                         // same page
629                         wfDebug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
630                                 $this->title->getPrefixedText() . ']], timestamp ' . $this->timestamp . "\n" );
631                         return false;
632                 }
633
634                 // Select previous version to make size diffs correct
635                 // @todo This assumes that multiple revisions of the same page are imported
636                 // in order from oldest to newest.
637                 $prevId = $dbw->selectField( 'revision', 'rev_id',
638                         [
639                                 'rev_page' => $pageId,
640                                 'rev_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $this->timestamp ) ),
641                         ],
642                         __METHOD__,
643                         [ 'ORDER BY' => [
644                                         'rev_timestamp DESC',
645                                         'rev_id DESC', // timestamp is not unique per page
646                                 ]
647                         ]
648                 );
649
650                 # @todo FIXME: Use original rev_id optionally (better for backups)
651                 # Insert the row
652                 $revision = new Revision( [
653                         'title' => $this->title,
654                         'page' => $pageId,
655                         'content_model' => $this->getModel(),
656                         'content_format' => $this->getFormat(),
657                         // XXX: just set 'content' => $this->getContent()?
658                         'text' => $this->getContent()->serialize( $this->getFormat() ),
659                         'comment' => $this->getComment(),
660                         'user' => $userId,
661                         'user_text' => $userText,
662                         'timestamp' => $this->timestamp,
663                         'minor_edit' => $this->minor,
664                         'parent_id' => $prevId,
665                         ] );
666                 $revision->insertOn( $dbw );
667                 $changed = $page->updateIfNewerOn( $dbw, $revision );
668
669                 if ( $changed !== false && !$this->mNoUpdates ) {
670                         wfDebug( __METHOD__ . ": running updates\n" );
671                         // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
672                         $page->doEditUpdates(
673                                 $revision,
674                                 $user,
675                                 [ 'created' => $created, 'oldcountable' => 'no-change' ]
676                         );
677                 }
678
679                 return true;
680         }
681
682         /**
683          * @since 1.12.2
684          * @return bool
685          */
686         public function importLogItem() {
687                 $dbw = wfGetDB( DB_MASTER );
688
689                 $user = $this->getUserObj() ?: User::newFromName( $this->getUser() );
690                 if ( $user ) {
691                         $userId = intval( $user->getId() );
692                         $userText = $user->getName();
693                 } else {
694                         $userId = 0;
695                         $userText = $this->getUser();
696                 }
697
698                 # @todo FIXME: This will not record autoblocks
699                 if ( !$this->getTitle() ) {
700                         wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
701                                 $this->timestamp . "\n" );
702                         return false;
703                 }
704                 # Check if it exists already
705                 // @todo FIXME: Use original log ID (better for backups)
706                 $prior = $dbw->selectField( 'logging', '1',
707                         [ 'log_type' => $this->getType(),
708                                 'log_action' => $this->getAction(),
709                                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
710                                 'log_namespace' => $this->getTitle()->getNamespace(),
711                                 'log_title' => $this->getTitle()->getDBkey(),
712                                 # 'log_user_text' => $this->user_text,
713                                 'log_params' => $this->params ],
714                         __METHOD__
715                 );
716                 // @todo FIXME: This could fail slightly for multiple matches :P
717                 if ( $prior ) {
718                         wfDebug( __METHOD__
719                                 . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp "
720                                 . $this->timestamp . "\n" );
721                         return false;
722                 }
723                 $data = [
724                         'log_type' => $this->type,
725                         'log_action' => $this->action,
726                         'log_timestamp' => $dbw->timestamp( $this->timestamp ),
727                         'log_user' => $userId,
728                         'log_user_text' => $userText,
729                         'log_namespace' => $this->getTitle()->getNamespace(),
730                         'log_title' => $this->getTitle()->getDBkey(),
731                         'log_params' => $this->params
732                 ] + CommentStore::newKey( 'log_comment' )->insert( $dbw, $this->getComment() );
733                 $dbw->insert( 'logging', $data, __METHOD__ );
734
735                 return true;
736         }
737
738         /**
739          * @since 1.12.2
740          * @return bool
741          */
742         public function importUpload() {
743                 # Construct a file
744                 $archiveName = $this->getArchiveName();
745                 if ( $archiveName ) {
746                         wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
747                         $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
748                                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
749                 } else {
750                         $file = wfLocalFile( $this->getTitle() );
751                         $file->load( File::READ_LATEST );
752                         wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
753                         if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
754                                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
755                                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
756                                         RepoGroup::singleton()->getLocalRepo(), $archiveName );
757                                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
758                         }
759                 }
760                 if ( !$file ) {
761                         wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
762                         return false;
763                 }
764
765                 # Get the file source or download if necessary
766                 $source = $this->getFileSrc();
767                 $autoDeleteSource = $this->isTempSrc();
768                 if ( !strlen( $source ) ) {
769                         $source = $this->downloadSource();
770                         $autoDeleteSource = true;
771                 }
772                 if ( !strlen( $source ) ) {
773                         wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
774                         return false;
775                 }
776
777                 $tmpFile = new TempFSFile( $source );
778                 if ( $autoDeleteSource ) {
779                         $tmpFile->autocollect();
780                 }
781
782                 $sha1File = ltrim( sha1_file( $source ), '0' );
783                 $sha1 = $this->getSha1();
784                 if ( $sha1 && ( $sha1 !== $sha1File ) ) {
785                         wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
786                         return false;
787                 }
788
789                 $user = $this->getUserObj() ?: User::newFromName( $this->getUser() );
790
791                 # Do the actual upload
792                 if ( $archiveName ) {
793                         $status = $file->uploadOld( $source, $archiveName,
794                                 $this->getTimestamp(), $this->getComment(), $user );
795                 } else {
796                         $flags = 0;
797                         $status = $file->upload( $source, $this->getComment(), $this->getComment(),
798                                 $flags, false, $this->getTimestamp(), $user );
799                 }
800
801                 if ( $status->isGood() ) {
802                         wfDebug( __METHOD__ . ": Successful\n" );
803                         return true;
804                 } else {
805                         wfDebug( __METHOD__ . ': failed: ' . $status->getHTML() . "\n" );
806                         return false;
807                 }
808         }
809
810         /**
811          * @since 1.12.2
812          * @return bool|string
813          */
814         public function downloadSource() {
815                 if ( !$this->config->get( 'EnableUploads' ) ) {
816                         return false;
817                 }
818
819                 $tempo = tempnam( wfTempDir(), 'download' );
820                 $f = fopen( $tempo, 'wb' );
821                 if ( !$f ) {
822                         wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
823                         return false;
824                 }
825
826                 // @todo FIXME!
827                 $src = $this->getSrc();
828                 $data = Http::get( $src, [], __METHOD__ );
829                 if ( !$data ) {
830                         wfDebug( "IMPORT: couldn't fetch source $src\n" );
831                         fclose( $f );
832                         unlink( $tempo );
833                         return false;
834                 }
835
836                 fwrite( $f, $data );
837                 fclose( $f );
838
839                 return $tempo;
840         }
841
842 }