X-Git-Url: https://scripts.mit.edu/gitweb/autoinstallsdev/mediawiki.git/blobdiff_plain/8989532d3de45b196373107c7a812a68ac0ff2d9..d75ce11339b35963b5f8c3d53190819c1c025716:/maintenance/refreshLinks.inc diff --git a/maintenance/refreshLinks.inc b/maintenance/refreshLinks.inc deleted file mode 100644 index b7d531c7..00000000 --- a/maintenance/refreshLinks.inc +++ /dev/null @@ -1,202 +0,0 @@ -setOption('math', MW_MATH_SOURCE); - - # Don't generate extension images (e.g. Timeline) - if( method_exists( $wgParser, "clearTagHooks" ) ) { - $wgParser->clearTagHooks(); - } - - # Don't use HTML tidy - $wgUseTidy = false; - - $what = $redirectsOnly ? "redirects" : "links"; - - if( $oldRedirectsOnly ) { - # This entire code path is cut-and-pasted from below. Hurrah. - $res = $dbr->query( - "SELECT page_id ". - "FROM page ". - "LEFT JOIN redirect ON page_id=rd_from ". - "WHERE page_is_redirect=1 AND rd_from IS NULL AND ". - ($end == 0 ? "page_id >= $start" - : "page_id BETWEEN $start AND $end"), - $fname - ); - $num = $dbr->numRows( $res ); - print "Refreshing $num old redirects from $start...\n"; - - while( $row = $dbr->fetchObject( $res ) ) { - if ( !( ++$i % $reportingInterval ) ) { - print "$i\n"; - wfWaitForSlaves( $maxLag ); - } - fixRedirect( $row->page_id ); - } - } elseif( $newOnly ) { - print "Refreshing $what from "; - $res = $dbr->select( 'page', - array( 'page_id' ), - array( - 'page_is_new' => 1, - "page_id >= $start" ), - $fname - ); - $num = $dbr->numRows( $res ); - print "$num new articles...\n"; - - $i = 0; - while ( $row = $dbr->fetchObject( $res ) ) { - if ( !( ++$i % $reportingInterval ) ) { - print "$i\n"; - wfWaitForSlaves( $maxLag ); - } - if($redirectsOnly) - fixRedirect( $row->page_id ); - else - fixLinksFromArticle( $row->page_id ); - } - } else { - print "Refreshing $what table.\n"; - if ( !$end ) { - $end = $dbr->selectField( 'page', 'max(page_id)', false ); - } - print("Starting from page_id $start of $end.\n"); - - for ($id = $start; $id <= $end; $id++) { - - if ( !($id % $reportingInterval) ) { - print "$id\n"; - wfWaitForSlaves( $maxLag ); - } - if($redirectsOnly) - fixRedirect( $id ); - else - fixLinksFromArticle( $id ); - } - } -} - -function fixRedirect( $id ){ - global $wgTitle, $wgArticle; - - $wgTitle = Title::newFromID( $id ); - $dbw = wfGetDB( DB_MASTER ); - - if ( is_null( $wgTitle ) ) { - return; - } - $wgArticle = new Article($wgTitle); - - $rt = $wgArticle->followRedirect(); - - if($rt == false || !is_object($rt)) - return; - - $wgArticle->updateRedirectOn($dbw,$rt); -} - -function fixLinksFromArticle( $id ) { - global $wgTitle, $wgParser; - - $wgTitle = Title::newFromID( $id ); - $dbw = wfGetDB( DB_MASTER ); - - $linkCache =& LinkCache::singleton(); - $linkCache->clear(); - - if ( is_null( $wgTitle ) ) { - return; - } - $dbw->begin(); - - $revision = Revision::newFromTitle( $wgTitle ); - if ( !$revision ) { - return; - } - - $options = new ParserOptions; - $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() ); - $update = new LinksUpdate( $wgTitle, $parserOutput, false ); - $update->doUpdate(); - $dbw->immediateCommit(); -} - -/* - * Removes non-existing links from pages from pagelinks, imagelinks, - * categorylinks, templatelinks and externallinks tables. - * - * @param $maxLag - * @param $batchSize The size of deletion batches - * - * @author Merlijn van Deen - */ -function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { - wfWaitForSlaves( $maxLag ); - - $dbw = wfGetDB( DB_MASTER ); - - $lb = wfGetLBFactory()->newMainLB(); - $dbr = $lb->getConnection( DB_SLAVE ); - $dbr->bufferResults( false ); - - $linksTables = array( // table name => page_id field - 'pagelinks' => 'pl_from', - 'imagelinks' => 'il_from', - 'categorylinks' => 'cl_from', - 'templatelinks' => 'tl_from', - 'externallinks' => 'el_from', - ); - - foreach ( $linksTables as $table => $field ) { - print "Retrieving illegal entries from $table... "; - - // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; - $results = $dbr->select( array( $table, 'page' ), - $field, - array('page_id' => null ), - __METHOD__, - 'DISTINCT', - array( 'page' => array( 'LEFT JOIN', "$field=page_id")) - ); - - $counter = 0; - $list = array(); - print "0.."; - - foreach( $results as $row ) { - $counter++; - $list[] = $row->$field; - if ( ( $counter % $batchSize ) == 0 ) { - wfWaitForSlaves(5); - $dbw->delete( $table, array( $field => $list ), __METHOD__ ); - - print $counter . ".."; - $list = array(); - } - } - - print $counter; - if (count($list) > 0) { - $dbw->delete( $table, array( $field => $list ), __METHOD__ ); - } - - print "\n"; - } - - $lb->closeAll(); -}