]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - maintenance/refreshLinks.inc
MediaWiki 1.15.0
[autoinstalls/mediawiki.git] / maintenance / refreshLinks.inc
1 <?php
2 /**
3  * @todo document
4  * @file
5  * @ingroup Maintenance
6  */
7
8 function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
9         global $wgUser, $wgParser, $wgUseTidy;
10
11         $reportingInterval = 100;
12         $fname = 'refreshLinks';
13         $dbr = wfGetDB( DB_SLAVE );
14         $start = intval( $start );
15
16         # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
17         $wgUser->setOption('math', MW_MATH_SOURCE);
18
19         # Don't generate extension images (e.g. Timeline)
20         if( method_exists( $wgParser, "clearTagHooks" ) ) {
21                 $wgParser->clearTagHooks();
22         }
23
24         # Don't use HTML tidy
25         $wgUseTidy = false;
26
27         $what = $redirectsOnly ? "redirects" : "links";
28
29         if( $oldRedirectsOnly ) {
30                 # This entire code path is cut-and-pasted from below.  Hurrah.
31                 $res = $dbr->query(
32                         "SELECT page_id ".
33                         "FROM page ".
34                         "LEFT JOIN redirect ON page_id=rd_from ".
35                         "WHERE page_is_redirect=1 AND rd_from IS NULL AND ".
36                         ($end == 0 ? "page_id >= $start"
37                                    : "page_id BETWEEN $start AND $end"),
38                         $fname
39                 );
40                 $num = $dbr->numRows( $res );
41                 print "Refreshing $num old redirects from $start...\n";
42
43                 while( $row = $dbr->fetchObject( $res ) ) {
44                         if ( !( ++$i % $reportingInterval ) ) {
45                                 print "$i\n";
46                                 wfWaitForSlaves( $maxLag );
47                         }
48                         fixRedirect( $row->page_id );
49                 }
50         } elseif( $newOnly ) {
51                 print "Refreshing $what from ";
52                 $res = $dbr->select( 'page',
53                         array( 'page_id' ),
54                         array(
55                                 'page_is_new' => 1,
56                                 "page_id >= $start" ),
57                         $fname
58                 );
59                 $num = $dbr->numRows( $res );
60                 print "$num new articles...\n";
61
62                 $i = 0;
63                 while ( $row = $dbr->fetchObject( $res ) ) {
64                         if ( !( ++$i % $reportingInterval ) ) {
65                                 print "$i\n";
66                                 wfWaitForSlaves( $maxLag );
67                         }
68                         if($redirectsOnly)
69                                 fixRedirect( $row->page_id );
70                         else
71                                 fixLinksFromArticle( $row->page_id );
72                 }
73         } else {
74                 print "Refreshing $what table.\n";
75                 if ( !$end ) {
76                         $end = $dbr->selectField( 'page', 'max(page_id)', false );
77                 }
78                 print("Starting from page_id $start of $end.\n");
79
80                 for ($id = $start; $id <= $end; $id++) {
81
82                         if ( !($id % $reportingInterval) ) {
83                                 print "$id\n";
84                                 wfWaitForSlaves( $maxLag );
85                         }
86                         if($redirectsOnly)
87                                 fixRedirect( $id );
88                         else
89                                 fixLinksFromArticle( $id );
90                 }
91         }
92 }
93
94 function fixRedirect( $id ){
95         global $wgTitle, $wgArticle;
96
97         $wgTitle = Title::newFromID( $id );
98         $dbw = wfGetDB( DB_MASTER );
99
100         if ( is_null( $wgTitle ) ) {
101                 return;
102         }
103         $wgArticle = new Article($wgTitle);
104
105         $rt = $wgArticle->followRedirect();
106
107         if($rt == false || !is_object($rt))
108                 return;
109
110         $wgArticle->updateRedirectOn($dbw,$rt);
111 }
112
113 function fixLinksFromArticle( $id ) {
114         global $wgTitle, $wgParser;
115
116         $wgTitle = Title::newFromID( $id );
117         $dbw = wfGetDB( DB_MASTER );
118
119         $linkCache =& LinkCache::singleton();
120         $linkCache->clear();
121
122         if ( is_null( $wgTitle ) ) {
123                 return;
124         }
125         $dbw->begin();
126
127         $revision = Revision::newFromTitle( $wgTitle );
128         if ( !$revision ) {
129                 return;
130         }
131
132         $options = new ParserOptions;
133         $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
134         $update = new LinksUpdate( $wgTitle, $parserOutput, false );
135         $update->doUpdate();
136         $dbw->immediateCommit();
137 }
138
139 /*
140  * Removes non-existing links from pages from pagelinks, imagelinks,
141  * categorylinks, templatelinks and externallinks tables.
142  *
143  * @param $maxLag
144  * @param $batchSize The size of deletion batches
145  *
146  * @author Merlijn van Deen <valhallasw@arctus.nl>
147  */
148 function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
149         wfWaitForSlaves( $maxLag );
150         
151         $dbw = wfGetDB( DB_MASTER );
152
153         $lb = wfGetLBFactory()->newMainLB();
154         $dbr = $lb->getConnection( DB_SLAVE );
155         $dbr->bufferResults( false );
156         
157         $linksTables = array( // table name => page_id field
158                 'pagelinks' => 'pl_from',
159                 'imagelinks' => 'il_from',
160                 'categorylinks' => 'cl_from',
161                 'templatelinks' => 'tl_from',
162                 'externallinks' => 'el_from',
163         );
164         
165         foreach ( $linksTables as $table => $field ) {
166                 print "Retrieving illegal entries from $table... ";
167                 
168                 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
169                 $results = $dbr->select( array( $table, 'page' ),
170                               $field,
171                               array('page_id' => null ),
172                               __METHOD__,
173                               'DISTINCT',
174                               array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
175                 );
176                 
177                 $counter = 0;
178                 $list = array();
179                 print "0..";
180                 
181                 foreach( $results as $row ) {
182                         $counter++;
183                         $list[] = $row->$field;
184                         if ( ( $counter % $batchSize ) == 0 ) {
185                                 wfWaitForSlaves(5);
186                                 $dbw->delete( $table, array( $field => $list ), __METHOD__ );
187                                 
188                                 print $counter . "..";
189                                 $list = array();
190                         }
191                 }
192                 
193                 print $counter;
194                 if (count($list) > 0) {
195                         $dbw->delete( $table, array( $field => $list ), __METHOD__ );
196                 }
197                 
198                 print "\n";
199         }
200         
201         $lb->closeAll();
202 }