]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - maintenance/convertLinks.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / maintenance / convertLinks.php
1 <?php
2 /**
3  * Convert from the old links schema (string->ID) to the new schema (ID->ID).
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  * http://www.gnu.org/copyleft/gpl.html
19  *
20  * @file
21  * @ingroup Maintenance
22  */
23
24 require_once __DIR__ . '/Maintenance.php';
25
26 /**
27  * Maintenance script to convert from the old links schema (string->ID)
28  * to the new schema (ID->ID).
29  *
30  * The wiki should be put into read-only mode while this script executes.
31  *
32  * @ingroup Maintenance
33  */
34 class ConvertLinks extends Maintenance {
35         private $logPerformance;
36
37         public function __construct() {
38                 parent::__construct();
39                 $this->addDescription(
40                         'Convert from the old links schema (string->ID) to the new schema (ID->ID). '
41                                 . 'The wiki should be put into read-only mode while this script executes' );
42
43                 $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
44                 $this->addArg(
45                         'perfLogFilename',
46                         "Filename where performance is logged if --logperformance was set "
47                                 . "(defaults to 'convLinksPerf.txt').",
48                         false
49                 );
50                 $this->addArg(
51                         'keep-links-table',
52                         "Don't overwrite the old links table with the new one, leave the new table at links_temp.",
53                         false
54                 );
55                 $this->addArg(
56                         'nokeys',
57                         /* (What about InnoDB?) */
58                         "Don't create keys, and so allow duplicates in the new links table.\n"
59                                 . "This gives a huge speed improvement for very large links tables which are MyISAM.",
60                         false
61                 );
62         }
63
64         public function getDbType() {
65                 return Maintenance::DB_ADMIN;
66         }
67
68         public function execute() {
69                 $dbw = $this->getDB( DB_MASTER );
70
71                 $type = $dbw->getType();
72                 if ( $type != 'mysql' ) {
73                         $this->output( "Link table conversion not necessary for $type\n" );
74
75                         return;
76                 }
77
78                 global $wgContLang;
79
80                 # counters etc
81                 $numBadLinks = $curRowsRead = 0;
82
83                 # total tuples INSERTed into links_temp
84                 $totalTuplesInserted = 0;
85
86                 # whether or not to give progress reports while reading IDs from cur table
87                 $reportCurReadProgress = true;
88
89                 # number of rows between progress reports
90                 $curReadReportInterval = 1000;
91
92                 # whether or not to give progress reports during conversion
93                 $reportLinksConvProgress = true;
94
95                 # number of rows per INSERT
96                 $linksConvInsertInterval = 1000;
97
98                 $initialRowOffset = 0;
99
100                 # not used yet; highest row number from links table to process
101                 # $finalRowOffset = 0;
102
103                 $overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
104                 $noKeys = $this->hasOption( 'noKeys' );
105                 $this->logPerformance = $this->hasOption( 'logperformance' );
106                 $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
107
108                 # --------------------------------------------------------------------
109
110                 list( $cur, $links, $links_temp, $links_backup ) =
111                         $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
112
113                 if ( $dbw->tableExists( 'pagelinks' ) ) {
114                         $this->output( "...have pagelinks; skipping old links table updates\n" );
115
116                         return;
117                 }
118
119                 $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
120                 if ( $dbw->fieldType( $res, 0 ) == "int" ) {
121                         $this->output( "Schema already converted\n" );
122
123                         return;
124                 }
125
126                 $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
127                 $row = $dbw->fetchObject( $res );
128                 $numRows = $row->count;
129                 $dbw->freeResult( $res );
130
131                 if ( $numRows == 0 ) {
132                         $this->output( "Updating schema (no rows to convert)...\n" );
133                         $this->createTempTable();
134                 } else {
135                         $fh = false;
136                         if ( $this->logPerformance ) {
137                                 $fh = fopen( $perfLogFilename, "w" );
138                                 if ( !$fh ) {
139                                         $this->error( "Couldn't open $perfLogFilename" );
140                                         $this->logPerformance = false;
141                                 }
142                         }
143                         $baseTime = $startTime = microtime( true );
144                         # Create a title -> cur_id map
145                         $this->output( "Loading IDs from $cur table...\n" );
146                         $this->performanceLog( $fh, "Reading $numRows rows from cur table...\n" );
147                         $this->performanceLog( $fh, "rows read vs seconds elapsed:\n" );
148
149                         $dbw->bufferResults( false );
150                         $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
151                         $ids = [];
152
153                         foreach ( $res as $row ) {
154                                 $title = $row->cur_title;
155                                 if ( $row->cur_namespace ) {
156                                         $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
157                                 }
158                                 $ids[$title] = $row->cur_id;
159                                 $curRowsRead++;
160                                 if ( $reportCurReadProgress ) {
161                                         if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
162                                                 $this->performanceLog(
163                                                         $fh,
164                                                         $curRowsRead . " " . ( microtime( true ) - $baseTime ) . "\n"
165                                                 );
166                                                 $this->output( "\t$curRowsRead rows of $cur table read.\n" );
167                                         }
168                                 }
169                         }
170                         $dbw->freeResult( $res );
171                         $dbw->bufferResults( true );
172                         $this->output( "Finished loading IDs.\n\n" );
173                         $this->performanceLog(
174                                 $fh,
175                                 "Took " . ( microtime( true ) - $baseTime ) . " seconds to load IDs.\n\n"
176                         );
177
178                         # --------------------------------------------------------------------
179
180                         # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
181                         # convert, and write to the new table.
182                         $this->createTempTable();
183                         $this->performanceLog( $fh, "Resetting timer.\n\n" );
184                         $baseTime = microtime( true );
185                         $this->output( "Processing $numRows rows from $links table...\n" );
186                         $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
187                         $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
188
189                         for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows;
190                                 $rowOffset += $linksConvInsertInterval
191                         ) {
192                                 $sqlRead = "SELECT * FROM $links ";
193                                 $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
194                                 $res = $dbw->query( $sqlRead );
195                                 if ( $noKeys ) {
196                                         $sqlWrite = [ "INSERT INTO $links_temp (l_from,l_to) VALUES " ];
197                                 } else {
198                                         $sqlWrite = [ "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " ];
199                                 }
200
201                                 $tuplesAdded = 0; # no tuples added to INSERT yet
202                                 foreach ( $res as $row ) {
203                                         $fromTitle = $row->l_from;
204                                         if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
205                                                 $from = $ids[$fromTitle];
206                                                 $to = $row->l_to;
207                                                 if ( $tuplesAdded != 0 ) {
208                                                         $sqlWrite[] = ",";
209                                                 }
210                                                 $sqlWrite[] = "($from,$to)";
211                                                 $tuplesAdded++;
212                                         } else { # invalid title
213                                                 $numBadLinks++;
214                                         }
215                                 }
216                                 $dbw->freeResult( $res );
217                                 # $this->output( "rowOffset: $rowOffset\ttuplesAdded: "
218                                 #       . "$tuplesAdded\tnumBadLinks: $numBadLinks\n" );
219                                 if ( $tuplesAdded != 0 ) {
220                                         if ( $reportLinksConvProgress ) {
221                                                 $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
222                                         }
223                                         $dbw->query( implode( "", $sqlWrite ) );
224                                         $totalTuplesInserted += $tuplesAdded;
225                                         if ( $reportLinksConvProgress ) {
226                                                 $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
227                                                 $this->performanceLog(
228                                                         $fh,
229                                                         $totalTuplesInserted . " " . ( microtime( true ) - $baseTime ) . "\n"
230                                                 );
231                                         }
232                                 }
233                         }
234                         $this->output( "$totalTuplesInserted valid titles and "
235                                 . "$numBadLinks invalid titles were processed.\n\n" );
236                         $this->performanceLog(
237                                 $fh,
238                                 "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n"
239                         );
240                         $this->performanceLog(
241                                 $fh,
242                                 "Total execution time: " . ( microtime( true ) - $startTime ) . " seconds.\n"
243                         );
244                         if ( $this->logPerformance ) {
245                                 fclose( $fh );
246                         }
247                 }
248                 # --------------------------------------------------------------------
249
250                 if ( $overwriteLinksTable ) {
251                         # Check for existing links_backup, and delete it if it exists.
252                         $this->output( "Dropping backup links table if it exists..." );
253                         $dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
254                         $this->output( " done.\n" );
255
256                         # Swap in the new table, and move old links table to links_backup
257                         $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
258                         $dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
259                         $this->output( " done.\n\n" );
260
261                         $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
262                         $this->output( "delete at your leisure.\n" );
263                 } else {
264                         $this->output( "Conversion complete.  The converted table is at $links_temp;\n" );
265                         $this->output( "the original links table is unchanged.\n" );
266                 }
267         }
268
269         private function createTempTable() {
270                 $dbConn = $this->getDB( DB_MASTER );
271
272                 if ( !( $dbConn->isOpen() ) ) {
273                         $this->output( "Opening connection to database failed.\n" );
274
275                         return;
276                 }
277                 $links_temp = $dbConn->tableName( 'links_temp' );
278
279                 $this->output( "Dropping temporary links table if it exists..." );
280                 $dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
281                 $this->output( " done.\n" );
282
283                 $this->output( "Creating temporary links table..." );
284                 if ( $this->hasOption( 'noKeys' ) ) {
285                         $dbConn->query( "CREATE TABLE $links_temp ( " .
286                                 "l_from int(8) unsigned NOT NULL default '0', " .
287                                 "l_to int(8) unsigned NOT NULL default '0')" );
288                 } else {
289                         $dbConn->query( "CREATE TABLE $links_temp ( " .
290                                 "l_from int(8) unsigned NOT NULL default '0', " .
291                                 "l_to int(8) unsigned NOT NULL default '0', " .
292                                 "UNIQUE KEY l_from(l_from,l_to), " .
293                                 "KEY (l_to))" );
294                 }
295                 $this->output( " done.\n\n" );
296         }
297
298         private function performanceLog( $fh, $text ) {
299                 if ( $this->logPerformance ) {
300                         fwrite( $fh, $text );
301                 }
302         }
303 }
304
305 $maintClass = "ConvertLinks";
306 require_once RUN_MAINTENANCE_IF_MAIN;