]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/parser/LinkHolderArray.php
MediaWiki 1.30.2-scripts2
[autoinstalls/mediawiki.git] / includes / parser / LinkHolderArray.php
1 <?php
2 /**
3  * Holder of replacement pairs for wiki links
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  * http://www.gnu.org/copyleft/gpl.html
19  *
20  * @file
21  * @ingroup Parser
22  */
23
24 /**
25  * @ingroup Parser
26  */
27 class LinkHolderArray {
28         public $internals = [];
29         public $interwikis = [];
30         public $size = 0;
31
32         /**
33          * @var Parser
34          */
35         public $parent;
36         protected $tempIdOffset;
37
38         /**
39          * @param Parser $parent
40          */
41         public function __construct( $parent ) {
42                 $this->parent = $parent;
43         }
44
45         /**
46          * Reduce memory usage to reduce the impact of circular references
47          */
48         public function __destruct() {
49                 foreach ( $this as $name => $value ) {
50                         unset( $this->$name );
51                 }
52         }
53
54         /**
55          * Don't serialize the parent object, it is big, and not needed when it is
56          * a parameter to mergeForeign(), which is the only application of
57          * serializing at present.
58          *
59          * Compact the titles, only serialize the text form.
60          * @return array
61          */
62         public function __sleep() {
63                 foreach ( $this->internals as &$nsLinks ) {
64                         foreach ( $nsLinks as &$entry ) {
65                                 unset( $entry['title'] );
66                         }
67                 }
68                 unset( $nsLinks );
69                 unset( $entry );
70
71                 foreach ( $this->interwikis as &$entry ) {
72                         unset( $entry['title'] );
73                 }
74                 unset( $entry );
75
76                 return [ 'internals', 'interwikis', 'size' ];
77         }
78
79         /**
80          * Recreate the Title objects
81          */
82         public function __wakeup() {
83                 foreach ( $this->internals as &$nsLinks ) {
84                         foreach ( $nsLinks as &$entry ) {
85                                 $entry['title'] = Title::newFromText( $entry['pdbk'] );
86                         }
87                 }
88                 unset( $nsLinks );
89                 unset( $entry );
90
91                 foreach ( $this->interwikis as &$entry ) {
92                         $entry['title'] = Title::newFromText( $entry['pdbk'] );
93                 }
94                 unset( $entry );
95         }
96
97         /**
98          * Merge another LinkHolderArray into this one
99          * @param LinkHolderArray $other
100          */
101         public function merge( $other ) {
102                 foreach ( $other->internals as $ns => $entries ) {
103                         $this->size += count( $entries );
104                         if ( !isset( $this->internals[$ns] ) ) {
105                                 $this->internals[$ns] = $entries;
106                         } else {
107                                 $this->internals[$ns] += $entries;
108                         }
109                 }
110                 $this->interwikis += $other->interwikis;
111         }
112
113         /**
114          * Merge a LinkHolderArray from another parser instance into this one. The
115          * keys will not be preserved. Any text which went with the old
116          * LinkHolderArray and needs to work with the new one should be passed in
117          * the $texts array. The strings in this array will have their link holders
118          * converted for use in the destination link holder. The resulting array of
119          * strings will be returned.
120          *
121          * @param LinkHolderArray $other
122          * @param array $texts Array of strings
123          * @return array
124          */
125         public function mergeForeign( $other, $texts ) {
126                 $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
127                 $maxId = 0;
128
129                 # Renumber internal links
130                 foreach ( $other->internals as $ns => $nsLinks ) {
131                         foreach ( $nsLinks as $key => $entry ) {
132                                 $newKey = $idOffset + $key;
133                                 $this->internals[$ns][$newKey] = $entry;
134                                 $maxId = $newKey > $maxId ? $newKey : $maxId;
135                         }
136                 }
137                 $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
138                         [ $this, 'mergeForeignCallback' ], $texts );
139
140                 # Renumber interwiki links
141                 foreach ( $other->interwikis as $key => $entry ) {
142                         $newKey = $idOffset + $key;
143                         $this->interwikis[$newKey] = $entry;
144                         $maxId = $newKey > $maxId ? $newKey : $maxId;
145                 }
146                 $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
147                         [ $this, 'mergeForeignCallback' ], $texts );
148
149                 # Set the parent link ID to be beyond the highest used ID
150                 $this->parent->setLinkID( $maxId + 1 );
151                 $this->tempIdOffset = null;
152                 return $texts;
153         }
154
155         /**
156          * @param array $m
157          * @return string
158          */
159         protected function mergeForeignCallback( $m ) {
160                 return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
161         }
162
163         /**
164          * Get a subset of the current LinkHolderArray which is sufficient to
165          * interpret the given text.
166          * @param string $text
167          * @return LinkHolderArray
168          */
169         public function getSubArray( $text ) {
170                 $sub = new LinkHolderArray( $this->parent );
171
172                 # Internal links
173                 $pos = 0;
174                 while ( $pos < strlen( $text ) ) {
175                         if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
176                                 $text, $m, PREG_OFFSET_CAPTURE, $pos )
177                         ) {
178                                 break;
179                         }
180                         $ns = $m[1][0];
181                         $key = $m[2][0];
182                         $sub->internals[$ns][$key] = $this->internals[$ns][$key];
183                         $pos = $m[0][1] + strlen( $m[0][0] );
184                 }
185
186                 # Interwiki links
187                 $pos = 0;
188                 while ( $pos < strlen( $text ) ) {
189                         if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
190                                 break;
191                         }
192                         $key = $m[1][0];
193                         $sub->interwikis[$key] = $this->interwikis[$key];
194                         $pos = $m[0][1] + strlen( $m[0][0] );
195                 }
196                 return $sub;
197         }
198
199         /**
200          * Returns true if the memory requirements of this object are getting large
201          * @return bool
202          */
203         public function isBig() {
204                 global $wgLinkHolderBatchSize;
205                 return $this->size > $wgLinkHolderBatchSize;
206         }
207
208         /**
209          * Clear all stored link holders.
210          * Make sure you don't have any text left using these link holders, before you call this
211          */
212         public function clear() {
213                 $this->internals = [];
214                 $this->interwikis = [];
215                 $this->size = 0;
216         }
217
218         /**
219          * Make a link placeholder. The text returned can be later resolved to a real link with
220          * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
221          * parsing of interwiki links, and secondly to allow all existence checks and
222          * article length checks (for stub links) to be bundled into a single query.
223          *
224          * @param Title $nt
225          * @param string $text
226          * @param array $query [optional]
227          * @param string $trail [optional]
228          * @param string $prefix [optional]
229          * @return string
230          */
231         public function makeHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
232                 if ( !is_object( $nt ) ) {
233                         # Fail gracefully
234                         $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
235                 } else {
236                         # Separate the link trail from the rest of the link
237                         list( $inside, $trail ) = Linker::splitTrail( $trail );
238
239                         $entry = [
240                                 'title' => $nt,
241                                 'text' => $prefix . $text . $inside,
242                                 'pdbk' => $nt->getPrefixedDBkey(),
243                         ];
244                         if ( $query !== [] ) {
245                                 $entry['query'] = $query;
246                         }
247
248                         if ( $nt->isExternal() ) {
249                                 // Use a globally unique ID to keep the objects mergable
250                                 $key = $this->parent->nextLinkID();
251                                 $this->interwikis[$key] = $entry;
252                                 $retVal = "<!--IWLINK $key-->{$trail}";
253                         } else {
254                                 $key = $this->parent->nextLinkID();
255                                 $ns = $nt->getNamespace();
256                                 $this->internals[$ns][$key] = $entry;
257                                 $retVal = "<!--LINK $ns:$key-->{$trail}";
258                         }
259                         $this->size++;
260                 }
261                 return $retVal;
262         }
263
264         /**
265          * Replace <!--LINK--> link placeholders with actual links, in the buffer
266          *
267          * @param string &$text
268          */
269         public function replace( &$text ) {
270                 $this->replaceInternal( $text );
271                 $this->replaceInterwiki( $text );
272         }
273
274         /**
275          * Replace internal links
276          * @param string &$text
277          */
278         protected function replaceInternal( &$text ) {
279                 if ( !$this->internals ) {
280                         return;
281                 }
282
283                 global $wgContLang;
284
285                 $colours = [];
286                 $linkCache = LinkCache::singleton();
287                 $output = $this->parent->getOutput();
288                 $linkRenderer = $this->parent->getLinkRenderer();
289
290                 $dbr = wfGetDB( DB_REPLICA );
291
292                 # Sort by namespace
293                 ksort( $this->internals );
294
295                 $linkcolour_ids = [];
296
297                 # Generate query
298                 $lb = new LinkBatch();
299                 $lb->setCaller( __METHOD__ );
300
301                 foreach ( $this->internals as $ns => $entries ) {
302                         foreach ( $entries as $entry ) {
303                                 /** @var Title $title */
304                                 $title = $entry['title'];
305                                 $pdbk = $entry['pdbk'];
306
307                                 # Skip invalid entries.
308                                 # Result will be ugly, but prevents crash.
309                                 if ( is_null( $title ) ) {
310                                         continue;
311                                 }
312
313                                 # Check if it's a static known link, e.g. interwiki
314                                 if ( $title->isAlwaysKnown() ) {
315                                         $colours[$pdbk] = '';
316                                 } elseif ( $ns == NS_SPECIAL ) {
317                                         $colours[$pdbk] = 'new';
318                                 } else {
319                                         $id = $linkCache->getGoodLinkID( $pdbk );
320                                         if ( $id != 0 ) {
321                                                 $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
322                                                 $output->addLink( $title, $id );
323                                                 $linkcolour_ids[$id] = $pdbk;
324                                         } elseif ( $linkCache->isBadLink( $pdbk ) ) {
325                                                 $colours[$pdbk] = 'new';
326                                         } else {
327                                                 # Not in the link cache, add it to the query
328                                                 $lb->addObj( $title );
329                                         }
330                                 }
331                         }
332                 }
333                 if ( !$lb->isEmpty() ) {
334                         $fields = array_merge(
335                                 LinkCache::getSelectFields(),
336                                 [ 'page_namespace', 'page_title' ]
337                         );
338
339                         $res = $dbr->select(
340                                 'page',
341                                 $fields,
342                                 $lb->constructSet( 'page', $dbr ),
343                                 __METHOD__
344                         );
345
346                         # Fetch data and form into an associative array
347                         # non-existent = broken
348                         foreach ( $res as $s ) {
349                                 $title = Title::makeTitle( $s->page_namespace, $s->page_title );
350                                 $pdbk = $title->getPrefixedDBkey();
351                                 $linkCache->addGoodLinkObjFromRow( $title, $s );
352                                 $output->addLink( $title, $s->page_id );
353                                 $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
354                                 // add id to the extension todolist
355                                 $linkcolour_ids[$s->page_id] = $pdbk;
356                         }
357                         unset( $res );
358                 }
359                 if ( count( $linkcolour_ids ) ) {
360                         // pass an array of page_ids to an extension
361                         Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
362                 }
363
364                 # Do a second query for different language variants of links and categories
365                 if ( $wgContLang->hasVariants() ) {
366                         $this->doVariants( $colours );
367                 }
368
369                 # Construct search and replace arrays
370                 $replacePairs = [];
371                 foreach ( $this->internals as $ns => $entries ) {
372                         foreach ( $entries as $index => $entry ) {
373                                 $pdbk = $entry['pdbk'];
374                                 $title = $entry['title'];
375                                 $query = isset( $entry['query'] ) ? $entry['query'] : [];
376                                 $key = "$ns:$index";
377                                 $searchkey = "<!--LINK $key-->";
378                                 $displayText = $entry['text'];
379                                 if ( isset( $entry['selflink'] ) ) {
380                                         $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
381                                         continue;
382                                 }
383                                 if ( $displayText === '' ) {
384                                         $displayText = null;
385                                 } else {
386                                         $displayText = new HtmlArmor( $displayText );
387                                 }
388                                 if ( !isset( $colours[$pdbk] ) ) {
389                                         $colours[$pdbk] = 'new';
390                                 }
391                                 $attribs = [];
392                                 if ( $colours[$pdbk] == 'new' ) {
393                                         $linkCache->addBadLinkObj( $title );
394                                         $output->addLink( $title, 0 );
395                                         $link = $linkRenderer->makeBrokenLink(
396                                                 $title, $displayText, $attribs, $query
397                                         );
398                                 } else {
399                                         $link = $linkRenderer->makePreloadedLink(
400                                                 $title, $displayText, $colours[$pdbk], $attribs, $query
401                                         );
402                                 }
403
404                                 $replacePairs[$searchkey] = $link;
405                         }
406                 }
407                 $replacer = new HashtableReplacer( $replacePairs, 1 );
408
409                 # Do the thing
410                 $text = preg_replace_callback(
411                         '/(<!--LINK .*?-->)/',
412                         $replacer->cb(),
413                         $text
414                 );
415         }
416
417         /**
418          * Replace interwiki links
419          * @param string &$text
420          */
421         protected function replaceInterwiki( &$text ) {
422                 if ( empty( $this->interwikis ) ) {
423                         return;
424                 }
425
426                 # Make interwiki link HTML
427                 $output = $this->parent->getOutput();
428                 $replacePairs = [];
429                 $linkRenderer = $this->parent->getLinkRenderer();
430                 foreach ( $this->interwikis as $key => $link ) {
431                         $replacePairs[$key] = $linkRenderer->makeLink(
432                                 $link['title'],
433                                 new HtmlArmor( $link['text'] )
434                         );
435                         $output->addInterwikiLink( $link['title'] );
436                 }
437                 $replacer = new HashtableReplacer( $replacePairs, 1 );
438
439                 $text = preg_replace_callback(
440                         '/<!--IWLINK (.*?)-->/',
441                         $replacer->cb(),
442                         $text );
443         }
444
445         /**
446          * Modify $this->internals and $colours according to language variant linking rules
447          * @param array &$colours
448          */
449         protected function doVariants( &$colours ) {
450                 global $wgContLang;
451                 $linkBatch = new LinkBatch();
452                 $variantMap = []; // maps $pdbkey_Variant => $keys (of link holders)
453                 $output = $this->parent->getOutput();
454                 $linkCache = LinkCache::singleton();
455                 $titlesToBeConverted = '';
456                 $titlesAttrs = [];
457
458                 // Concatenate titles to a single string, thus we only need auto convert the
459                 // single string to all variants. This would improve parser's performance
460                 // significantly.
461                 foreach ( $this->internals as $ns => $entries ) {
462                         if ( $ns == NS_SPECIAL ) {
463                                 continue;
464                         }
465                         foreach ( $entries as $index => $entry ) {
466                                 $pdbk = $entry['pdbk'];
467                                 // we only deal with new links (in its first query)
468                                 if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
469                                         $titlesAttrs[] = [ $index, $entry['title'] ];
470                                         // separate titles with \0 because it would never appears
471                                         // in a valid title
472                                         $titlesToBeConverted .= $entry['title']->getText() . "\0";
473                                 }
474                         }
475                 }
476
477                 // Now do the conversion and explode string to text of titles
478                 $titlesAllVariants = $wgContLang->autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
479                 $allVariantsName = array_keys( $titlesAllVariants );
480                 foreach ( $titlesAllVariants as &$titlesVariant ) {
481                         $titlesVariant = explode( "\0", $titlesVariant );
482                 }
483
484                 // Then add variants of links to link batch
485                 $parentTitle = $this->parent->getTitle();
486                 foreach ( $titlesAttrs as $i => $attrs ) {
487                         /** @var Title $title */
488                         list( $index, $title ) = $attrs;
489                         $ns = $title->getNamespace();
490                         $text = $title->getText();
491
492                         foreach ( $allVariantsName as $variantName ) {
493                                 $textVariant = $titlesAllVariants[$variantName][$i];
494                                 if ( $textVariant === $text ) {
495                                         continue;
496                                 }
497
498                                 $variantTitle = Title::makeTitle( $ns, $textVariant );
499
500                                 // Self-link checking for mixed/different variant titles. At this point, we
501                                 // already know the exact title does not exist, so the link cannot be to a
502                                 // variant of the current title that exists as a separate page.
503                                 if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) {
504                                         $this->internals[$ns][$index]['selflink'] = true;
505                                         continue 2;
506                                 }
507
508                                 $linkBatch->addObj( $variantTitle );
509                                 $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
510                         }
511                 }
512
513                 // process categories, check if a category exists in some variant
514                 $categoryMap = []; // maps $category_variant => $category (dbkeys)
515                 $varCategories = []; // category replacements oldDBkey => newDBkey
516                 foreach ( $output->getCategoryLinks() as $category ) {
517                         $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
518                         $linkBatch->addObj( $categoryTitle );
519                         $variants = $wgContLang->autoConvertToAllVariants( $category );
520                         foreach ( $variants as $variant ) {
521                                 if ( $variant !== $category ) {
522                                         $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
523                                         if ( is_null( $variantTitle ) ) {
524                                                 continue;
525                                         }
526                                         $linkBatch->addObj( $variantTitle );
527                                         $categoryMap[$variant] = [ $category, $categoryTitle ];
528                                 }
529                         }
530                 }
531
532                 if ( !$linkBatch->isEmpty() ) {
533                         // construct query
534                         $dbr = wfGetDB( DB_REPLICA );
535                         $fields = array_merge(
536                                 LinkCache::getSelectFields(),
537                                 [ 'page_namespace', 'page_title' ]
538                         );
539
540                         $varRes = $dbr->select( 'page',
541                                 $fields,
542                                 $linkBatch->constructSet( 'page', $dbr ),
543                                 __METHOD__
544                         );
545
546                         $linkcolour_ids = [];
547                         $linkRenderer = $this->parent->getLinkRenderer();
548
549                         // for each found variants, figure out link holders and replace
550                         foreach ( $varRes as $s ) {
551                                 $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
552                                 $varPdbk = $variantTitle->getPrefixedDBkey();
553                                 $vardbk = $variantTitle->getDBkey();
554
555                                 $holderKeys = [];
556                                 if ( isset( $variantMap[$varPdbk] ) ) {
557                                         $holderKeys = $variantMap[$varPdbk];
558                                         $linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
559                                         $output->addLink( $variantTitle, $s->page_id );
560                                 }
561
562                                 // loop over link holders
563                                 foreach ( $holderKeys as $key ) {
564                                         list( $ns, $index ) = explode( ':', $key, 2 );
565                                         $entry =& $this->internals[$ns][$index];
566                                         $pdbk = $entry['pdbk'];
567
568                                         if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
569                                                 // found link in some of the variants, replace the link holder data
570                                                 $entry['title'] = $variantTitle;
571                                                 $entry['pdbk'] = $varPdbk;
572
573                                                 // set pdbk and colour
574                                                 $colours[$varPdbk] = $linkRenderer->getLinkClasses( $variantTitle );
575                                                 $linkcolour_ids[$s->page_id] = $pdbk;
576                                         }
577                                 }
578
579                                 // check if the object is a variant of a category
580                                 if ( isset( $categoryMap[$vardbk] ) ) {
581                                         list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
582                                         if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
583                                                 $varCategories[$oldkey] = $vardbk;
584                                         }
585                                 }
586                         }
587                         Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
588
589                         // rebuild the categories in original order (if there are replacements)
590                         if ( count( $varCategories ) > 0 ) {
591                                 $newCats = [];
592                                 $originalCats = $output->getCategories();
593                                 foreach ( $originalCats as $cat => $sortkey ) {
594                                         // make the replacement
595                                         if ( array_key_exists( $cat, $varCategories ) ) {
596                                                 $newCats[$varCategories[$cat]] = $sortkey;
597                                         } else {
598                                                 $newCats[$cat] = $sortkey;
599                                         }
600                                 }
601                                 $output->setCategoryLinks( $newCats );
602                         }
603                 }
604         }
605
606         /**
607          * Replace <!--LINK--> link placeholders with plain text of links
608          * (not HTML-formatted).
609          *
610          * @param string $text
611          * @return string
612          */
613         public function replaceText( $text ) {
614                 $text = preg_replace_callback(
615                         '/<!--(LINK|IWLINK) (.*?)-->/',
616                         [ $this, 'replaceTextCallback' ],
617                         $text );
618
619                 return $text;
620         }
621
622         /**
623          * Callback for replaceText()
624          *
625          * @param array $matches
626          * @return string
627          * @private
628          */
629         public function replaceTextCallback( $matches ) {
630                 $type = $matches[1];
631                 $key = $matches[2];
632                 if ( $type == 'LINK' ) {
633                         list( $ns, $index ) = explode( ':', $key, 2 );
634                         if ( isset( $this->internals[$ns][$index]['text'] ) ) {
635                                 return $this->internals[$ns][$index]['text'];
636                         }
637                 } elseif ( $type == 'IWLINK' ) {
638                         if ( isset( $this->interwikis[$key]['text'] ) ) {
639                                 return $this->interwikis[$key]['text'];
640                         }
641                 }
642                 return $matches[0];
643         }
644 }