]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - maintenance/namespaceDupes.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / maintenance / namespaceDupes.php
1 <?php
2 /**
3  * Check for articles to fix after adding/deleting namespaces
4  *
5  * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6  * https://www.mediawiki.org/
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  * http://www.gnu.org/copyleft/gpl.html
22  *
23  * @file
24  * @ingroup Maintenance
25  */
26
27 require_once __DIR__ . '/Maintenance.php';
28
29 use MediaWiki\Linker\LinkTarget;
30 use MediaWiki\MediaWikiServices;
31 use Wikimedia\Rdbms\ResultWrapper;
32 use Wikimedia\Rdbms\IMaintainableDatabase;
33
34 /**
35  * Maintenance script that checks for articles to fix after
36  * adding/deleting namespaces.
37  *
38  * @ingroup Maintenance
39  */
40 class NamespaceConflictChecker extends Maintenance {
41
42         /**
43          * @var IMaintainableDatabase
44          */
45         protected $db;
46
47         private $resolvablePages = 0;
48         private $totalPages = 0;
49
50         private $resolvableLinks = 0;
51         private $totalLinks = 0;
52
53         public function __construct() {
54                 parent::__construct();
55                 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56                 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57                 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58                         "the correct title" );
59                 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60                         "<text> appended after the article name", false, true );
61                 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62                         "<text> prepended before the article name", false, true );
63                 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64                         "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65                         "the colon will be replaced with a hyphen.",
66                         false, true );
67                 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68                         "specify the namespace ID of the destination.", false, true );
69                 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70                         "begin with a conflicting prefix will be renamed, for example " .
71                         "Talk:File:Foo -> File_Talk:Foo" );
72         }
73
74         public function execute() {
75                 $this->db = $this->getDB( DB_MASTER );
76
77                 $options = [
78                         'fix' => $this->hasOption( 'fix' ),
79                         'merge' => $this->hasOption( 'merge' ),
80                         'add-suffix' => $this->getOption( 'add-suffix', '' ),
81                         'add-prefix' => $this->getOption( 'add-prefix', '' ),
82                         'move-talk' => $this->hasOption( 'move-talk' ),
83                         'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
84                         'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
85
86                 if ( $options['source-pseudo-namespace'] !== '' ) {
87                         $retval = $this->checkPrefix( $options );
88                 } else {
89                         $retval = $this->checkAll( $options );
90                 }
91
92                 if ( $retval ) {
93                         $this->output( "\nLooks good!\n" );
94                 } else {
95                         $this->output( "\nOh noeees\n" );
96                 }
97         }
98
99         /**
100          * Check all namespaces
101          *
102          * @param array $options Associative array of validated command-line options
103          *
104          * @return bool
105          */
106         private function checkAll( $options ) {
107                 global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
108
109                 $spaces = [];
110
111                 // List interwikis first, so they'll be overridden
112                 // by any conflicting local namespaces.
113                 foreach ( $this->getInterwikiList() as $prefix ) {
114                         $name = $wgContLang->ucfirst( $prefix );
115                         $spaces[$name] = 0;
116                 }
117
118                 // Now pull in all canonical and alias namespaces...
119                 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
120                         // This includes $wgExtraNamespaces
121                         if ( $name !== '' ) {
122                                 $spaces[$name] = $ns;
123                         }
124                 }
125                 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
126                         if ( $name !== '' ) {
127                                 $spaces[$name] = $ns;
128                         }
129                 }
130                 foreach ( $wgNamespaceAliases as $name => $ns ) {
131                         $spaces[$name] = $ns;
132                 }
133                 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
134                         $spaces[$name] = $ns;
135                 }
136
137                 // We'll need to check for lowercase keys as well,
138                 // since we're doing case-sensitive searches in the db.
139                 foreach ( $spaces as $name => $ns ) {
140                         $moreNames = [];
141                         $moreNames[] = $wgContLang->uc( $name );
142                         $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
143                         $moreNames[] = $wgContLang->ucwords( $name );
144                         $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
145                         $moreNames[] = $wgContLang->ucwordbreaks( $name );
146                         $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
147                         if ( !$wgCapitalLinks ) {
148                                 foreach ( $moreNames as $altName ) {
149                                         $moreNames[] = $wgContLang->lcfirst( $altName );
150                                 }
151                                 $moreNames[] = $wgContLang->lcfirst( $name );
152                         }
153                         foreach ( array_unique( $moreNames ) as $altName ) {
154                                 if ( $altName !== $name ) {
155                                         $spaces[$altName] = $ns;
156                                 }
157                         }
158                 }
159
160                 // Sort by namespace index, and if there are two with the same index,
161                 // break the tie by sorting by name
162                 $origSpaces = $spaces;
163                 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
164                         if ( $origSpaces[$a] < $origSpaces[$b] ) {
165                                 return -1;
166                         } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
167                                 return 1;
168                         } elseif ( $a < $b ) {
169                                 return -1;
170                         } elseif ( $a > $b ) {
171                                 return 1;
172                         } else {
173                                 return 0;
174                         }
175                 } );
176
177                 $ok = true;
178                 foreach ( $spaces as $name => $ns ) {
179                         $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
180                 }
181
182                 $this->output( "{$this->totalPages} pages to fix, " .
183                         "{$this->resolvablePages} were resolvable.\n\n" );
184
185                 foreach ( $spaces as $name => $ns ) {
186                         if ( $ns != 0 ) {
187                                 /* Fix up link destinations for non-interwiki links only.
188                                  *
189                                  * For example if a page has [[Foo:Bar]] and then a Foo namespace
190                                  * is introduced, pagelinks needs to be updated to have
191                                  * page_namespace = NS_FOO.
192                                  *
193                                  * If instead an interwiki prefix was introduced called "Foo",
194                                  * the link should instead be moved to the iwlinks table. If a new
195                                  * language is introduced called "Foo", or if there is a pagelink
196                                  * [[fr:Bar]] when interlanguage magic links are turned on, the
197                                  * link would have to be moved to the langlinks table. Let's put
198                                  * those cases in the too-hard basket for now. The consequences are
199                                  * not especially severe.
200                                  * @fixme Handle interwiki links, and pagelinks to Category:, File:
201                                  * which probably need reparsing.
202                                  */
203
204                                 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
205                                 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
206
207                                 // The redirect table has interwiki links randomly mixed in, we
208                                 // need to filter those out. For example [[w:Foo:Bar]] would
209                                 // have rd_interwiki=w and rd_namespace=0, which would match the
210                                 // query for a conflicting namespace "Foo" if filtering wasn't done.
211                                 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
212                                         [ 'rd_interwiki' => null ] );
213                                 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
214                                         [ 'rd_interwiki' => '' ] );
215                         }
216                 }
217
218                 $this->output( "{$this->totalLinks} links to fix, " .
219                         "{$this->resolvableLinks} were resolvable.\n" );
220
221                 return $ok;
222         }
223
224         /**
225          * Get the interwiki list
226          *
227          * @return array
228          */
229         private function getInterwikiList() {
230                 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
231                 $prefixes = [];
232                 foreach ( $result as $row ) {
233                         $prefixes[] = $row['iw_prefix'];
234                 }
235
236                 return $prefixes;
237         }
238
239         /**
240          * Check a given prefix and try to move it into the given destination namespace
241          *
242          * @param int $ns Destination namespace id
243          * @param string $name
244          * @param array $options Associative array of validated command-line options
245          * @return bool
246          */
247         private function checkNamespace( $ns, $name, $options ) {
248                 $targets = $this->getTargetList( $ns, $name, $options );
249                 $count = $targets->numRows();
250                 $this->totalPages += $count;
251                 if ( $count == 0 ) {
252                         return true;
253                 }
254
255                 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
256
257                 $ok = true;
258                 foreach ( $targets as $row ) {
259                         // Find the new title and determine the action to take
260
261                         $newTitle = $this->getDestinationTitle( $ns, $name,
262                                 $row->page_namespace, $row->page_title, $options );
263                         $logStatus = false;
264                         if ( !$newTitle ) {
265                                 $logStatus = 'invalid title';
266                                 $action = 'abort';
267                         } elseif ( $newTitle->exists() ) {
268                                 if ( $options['merge'] ) {
269                                         if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
270                                                 $action = 'merge';
271                                         } else {
272                                                 $action = 'abort';
273                                         }
274                                 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
275                                         $action = 'abort';
276                                         $logStatus = 'dest title exists and --add-prefix not specified';
277                                 } else {
278                                         $newTitle = $this->getAlternateTitle( $newTitle, $options );
279                                         if ( !$newTitle ) {
280                                                 $action = 'abort';
281                                                 $logStatus = 'alternate title is invalid';
282                                         } elseif ( $newTitle->exists() ) {
283                                                 $action = 'abort';
284                                                 $logStatus = 'title conflict';
285                                         } else {
286                                                 $action = 'move';
287                                                 $logStatus = 'alternate';
288                                         }
289                                 }
290                         } else {
291                                 $action = 'move';
292                                 $logStatus = 'no conflict';
293                         }
294
295                         // Take the action or log a dry run message
296
297                         $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
298                         $pageOK = true;
299
300                         switch ( $action ) {
301                                 case 'abort':
302                                         $this->output( "$logTitle *** $logStatus\n" );
303                                         $pageOK = false;
304                                         break;
305                                 case 'move':
306                                         $this->output( "$logTitle -> " .
307                                                 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
308
309                                         if ( $options['fix'] ) {
310                                                 $pageOK = $this->movePage( $row->page_id, $newTitle );
311                                         }
312                                         break;
313                                 case 'merge':
314                                         $this->output( "$logTitle => " .
315                                                 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
316
317                                         if ( $options['fix'] ) {
318                                                 $pageOK = $this->mergePage( $row, $newTitle );
319                                         }
320                                         break;
321                         }
322
323                         if ( $pageOK ) {
324                                 $this->resolvablePages++;
325                         } else {
326                                 $ok = false;
327                         }
328                 }
329
330                 return $ok;
331         }
332
333         /**
334          * Check and repair the destination fields in a link table
335          * @param string $table The link table name
336          * @param string $fieldPrefix The field prefix in the link table
337          * @param int $ns Destination namespace id
338          * @param string $name
339          * @param array $options Associative array of validated command-line options
340          * @param array $extraConds Extra conditions for the SQL query
341          */
342         private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
343                 $extraConds = []
344         ) {
345                 $batchConds = [];
346                 $fromField = "{$fieldPrefix}_from";
347                 $namespaceField = "{$fieldPrefix}_namespace";
348                 $titleField = "{$fieldPrefix}_title";
349                 $batchSize = 500;
350                 while ( true ) {
351                         $res = $this->db->select(
352                                 $table,
353                                 [ $fromField, $namespaceField, $titleField ],
354                                 array_merge( $batchConds, $extraConds, [
355                                         $namespaceField => 0,
356                                         $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
357                                 ] ),
358                                 __METHOD__,
359                                 [
360                                         'ORDER BY' => [ $titleField, $fromField ],
361                                         'LIMIT' => $batchSize
362                                 ]
363                         );
364
365                         if ( $res->numRows() == 0 ) {
366                                 break;
367                         }
368                         foreach ( $res as $row ) {
369                                 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
370                                         "dbk={$row->$titleField}";
371                                 $destTitle = $this->getDestinationTitle( $ns, $name,
372                                         $row->$namespaceField, $row->$titleField, $options );
373                                 $this->totalLinks++;
374                                 if ( !$destTitle ) {
375                                         $this->output( "$table $logTitle *** INVALID\n" );
376                                         continue;
377                                 }
378                                 $this->resolvableLinks++;
379                                 if ( !$options['fix'] ) {
380                                         $this->output( "$table $logTitle -> " .
381                                                 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
382                                         continue;
383                                 }
384
385                                 $this->db->update( $table,
386                                         // SET
387                                         [
388                                                 $namespaceField => $destTitle->getNamespace(),
389                                                 $titleField => $destTitle->getDBkey()
390                                         ],
391                                         // WHERE
392                                         [
393                                                 $namespaceField => 0,
394                                                 $titleField => $row->$titleField,
395                                                 $fromField => $row->$fromField
396                                         ],
397                                         __METHOD__,
398                                         [ 'IGNORE' ]
399                                 );
400                                 $this->output( "$table $logTitle -> " .
401                                         $destTitle->getPrefixedDBkey() . "\n" );
402                         }
403                         $encLastTitle = $this->db->addQuotes( $row->$titleField );
404                         $encLastFrom = $this->db->addQuotes( $row->$fromField );
405
406                         $batchConds = [
407                                 "$titleField > $encLastTitle " .
408                                 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
409
410                         wfWaitForSlaves();
411                 }
412         }
413
414         /**
415          * Move the given pseudo-namespace, either replacing the colon with a hyphen
416          * (useful for pseudo-namespaces that conflict with interwiki links) or move
417          * them to another namespace if specified.
418          * @param array $options Associative array of validated command-line options
419          * @return bool
420          */
421         private function checkPrefix( $options ) {
422                 $prefix = $options['source-pseudo-namespace'];
423                 $ns = $options['dest-namespace'];
424                 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
425
426                 return $this->checkNamespace( $ns, $prefix, $options );
427         }
428
429         /**
430          * Find pages in main and talk namespaces that have a prefix of the new
431          * namespace so we know titles that will need migrating
432          *
433          * @param int $ns Destination namespace id
434          * @param string $name Prefix that is being made a namespace
435          * @param array $options Associative array of validated command-line options
436          *
437          * @return ResultWrapper
438          */
439         private function getTargetList( $ns, $name, $options ) {
440                 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
441                         $checkNamespaces = [ NS_MAIN, NS_TALK ];
442                 } else {
443                         $checkNamespaces = NS_MAIN;
444                 }
445
446                 return $this->db->select( 'page',
447                         [
448                                 'page_id',
449                                 'page_title',
450                                 'page_namespace',
451                         ],
452                         [
453                                 'page_namespace' => $checkNamespaces,
454                                 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
455                         ],
456                         __METHOD__
457                 );
458         }
459
460         /**
461          * Get the preferred destination title for a given target page.
462          * @param int $ns The destination namespace ID
463          * @param string $name The conflicting prefix
464          * @param int $sourceNs The source namespace
465          * @param int $sourceDbk The source DB key (i.e. page_title)
466          * @param array $options Associative array of validated command-line options
467          * @return Title|false
468          */
469         private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
470                 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
471                 if ( $ns == 0 ) {
472                         // An interwiki; try an alternate encoding with '-' for ':'
473                         $dbk = "$name-" . $dbk;
474                 }
475                 $destNS = $ns;
476                 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
477                         // This is an associated talk page moved with the --move-talk feature.
478                         $destNS = MWNamespace::getTalk( $destNS );
479                 }
480                 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
481                 if ( !$newTitle || !$newTitle->canExist() ) {
482                         return false;
483                 }
484                 return $newTitle;
485         }
486
487         /**
488          * Get an alternative title to move a page to. This is used if the
489          * preferred destination title already exists.
490          *
491          * @param LinkTarget $linkTarget
492          * @param array $options Associative array of validated command-line options
493          * @return Title|bool
494          */
495         private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
496                 $prefix = $options['add-prefix'];
497                 $suffix = $options['add-suffix'];
498                 if ( $prefix == '' && $suffix == '' ) {
499                         return false;
500                 }
501                 while ( true ) {
502                         $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
503                         $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
504                         if ( !$title ) {
505                                 return false;
506                         }
507                         if ( !$title->exists() ) {
508                                 return $title;
509                         }
510                 }
511         }
512
513         /**
514          * Move a page
515          *
516          * @param integer $id The page_id
517          * @param LinkTarget $newLinkTarget The new title link target
518          * @return bool
519          */
520         private function movePage( $id, LinkTarget $newLinkTarget ) {
521                 $this->db->update( 'page',
522                         [
523                                 "page_namespace" => $newLinkTarget->getNamespace(),
524                                 "page_title" => $newLinkTarget->getDBkey(),
525                         ],
526                         [
527                                 "page_id" => $id,
528                         ],
529                         __METHOD__ );
530
531                 // Update *_from_namespace in links tables
532                 $fromNamespaceTables = [
533                         [ 'pagelinks', 'pl' ],
534                         [ 'templatelinks', 'tl' ],
535                         [ 'imagelinks', 'il' ] ];
536                 foreach ( $fromNamespaceTables as $tableInfo ) {
537                         list( $table, $fieldPrefix ) = $tableInfo;
538                         $this->db->update( $table,
539                                 // SET
540                                 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
541                                 // WHERE
542                                 [ "{$fieldPrefix}_from" => $id ],
543                                 __METHOD__ );
544                 }
545
546                 return true;
547         }
548
549         /**
550          * Determine if we can merge a page.
551          * We check if an inaccessible revision would become the latest and
552          * deny the merge if so -- it's theoretically possible to update the
553          * latest revision, but opens a can of worms -- search engine updates,
554          * recentchanges review, etc.
555          *
556          * @param integer $id The page_id
557          * @param LinkTarget $linkTarget The new link target
558          * @param string $logStatus This is set to the log status message on failure
559          * @return bool
560          */
561         private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
562                 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
563                 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
564                 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
565                         $logStatus = 'cannot merge since source is later';
566                         return false;
567                 } else {
568                         return true;
569                 }
570         }
571
572         /**
573          * Merge page histories
574          *
575          * @param stdClass $row Page row
576          * @param Title $newTitle The new title
577          * @return bool
578          */
579         private function mergePage( $row, Title $newTitle ) {
580                 $id = $row->page_id;
581
582                 // Construct the WikiPage object we will need later, while the
583                 // page_id still exists. Note that this cannot use makeTitleSafe(),
584                 // we are deliberately constructing an invalid title.
585                 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
586                 $sourceTitle->resetArticleID( $id );
587                 $wikiPage = new WikiPage( $sourceTitle );
588                 $wikiPage->loadPageData( 'fromdbmaster' );
589
590                 $destId = $newTitle->getArticleID();
591                 $this->beginTransaction( $this->db, __METHOD__ );
592                 $this->db->update( 'revision',
593                         // SET
594                         [ 'rev_page' => $destId ],
595                         // WHERE
596                         [ 'rev_page' => $id ],
597                         __METHOD__ );
598
599                 $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
600
601                 $this->commitTransaction( $this->db, __METHOD__ );
602
603                 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
604                  * and update category counts.
605                  *
606                  * Calling external code with a fake broken Title is a fairly dubious
607                  * idea. It's necessary because it's quite a lot of code to duplicate,
608                  * but that also makes it fragile since it would be easy for someone to
609                  * accidentally introduce an assumption of title validity to the code we
610                  * are calling.
611                  */
612                 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
613                 DeferredUpdates::doUpdates();
614
615                 return true;
616         }
617 }
618
619 $maintClass = "NamespaceConflictChecker";
620 require_once RUN_MAINTENANCE_IF_MAIN;