4 * A parser extension that adds two tags, <ref> and <references> for adding
10 * @link http://www.mediawiki.org/wiki/Extension:Cite/Cite.php
12 * <cite> definition in HTML
13 * @link http://www.w3.org/TR/html4/struct/text.html#edef-CITE
15 * <cite> definition in XHTML 2.0
16 * @link http://www.w3.org/TR/2005/WD-xhtml2-20050527/mod-text.html#edef_text_cite
18 * @bug https://phabricator.wikimedia.org/T6579
20 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
21 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
22 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
26 * WARNING: MediaWiki core hardcodes this class name to check if the
27 * Cite extension is installed. See T89151.
34 const DEFAULT_GROUP = '';
37 * Maximum storage capacity for pp_value field of page_props table
38 * @todo Find a way to retrieve this information from the DBAL
40 const MAX_STORAGE_LENGTH = 65535; // Size of MySQL 'blob' field
43 * Key used for storage in parser output's ExtensionData and ObjectCache
45 const EXT_DATA_KEY = 'Cite:References';
48 * Version number in case we change the data structure in the future
50 const DATA_VERSION_NUMBER = 1;
53 * Cache duration set when parsing a page with references
55 const CACHE_DURATION_ONPARSE = 3600; // 1 hour
58 * Cache duration set when fetching references from db
60 const CACHE_DURATION_ONFETCH = 18000; // 5 hours
63 * Datastructure representing <ref> input, in the format of:
66 * 'user supplied' => [
67 * 'text' => 'user supplied reference & key',
68 * 'count' => 1, // occurs twice
69 * 'number' => 1, // The first reference, we want
70 * // all occourances of it to
71 * // use the same number
73 * 0 => 'Anonymous reference',
74 * 1 => 'Another anonymous reference',
76 * 'text' => 'this one occurs once'
85 * * PHP's datastructures are guaranteed to be returned in the
86 * order that things are inserted into them (unless you mess
88 * * User supplied keys can't be integers, therefore avoiding
89 * conflict with anonymous keys
96 * Count for user displayed output (ref[1], ref[2], ...)
100 private $mOutCnt = 0;
105 private $mGroupCnt = [];
108 * Counter to track the total number of (useful) calls to either the
109 * ref or references tag hook
113 private $mCallCnt = 0;
116 * The backlinks, in order, to pass as $3 to
117 * 'cite_references_link_many_format', defined in
118 * 'cite_references_link_many_format_backlink_labels
122 private $mBacklinkLabels;
125 * The links to use per group, in order.
129 private $mLinkLabels = [];
137 * True when the ParserAfterParse hook has been called.
138 * Used to avoid doing anything in ParserBeforeTidy.
142 private $mHaveAfterParse = false;
145 * True when a <ref> tag is being processed.
146 * Used to avoid infinite recursion
150 public $mInCite = false;
153 * True when a <references> tag is being processed.
154 * Used to detect the use of <references> to define refs
158 public $mInReferences = false;
161 * Error stack used when defining refs in <references>
165 private $mReferencesErrors = [];
168 * Group used when in <references> block
172 private $mReferencesGroup = '';
176 * Used to cleanup out of sequence ref calls created by #tag
177 * See description of function rollbackRef.
181 private $mRefCallStack = [];
186 private $mBumpRefData = false;
189 * Did we install us into $wgHooks yet?
192 private static $hooksInstalled = false;
195 * Callback function for <ref>
197 * @param string|null $str Raw content of the <ref> tag.
198 * @param string[] $argv Arguments
199 * @param Parser $parser
200 * @param PPFrame $frame
204 public function ref( $str, array $argv, Parser $parser, PPFrame $frame ) {
205 if ( $this->mInCite ) {
206 return htmlspecialchars( "<ref>$str</ref>" );
210 $this->mInCite = true;
212 $ret = $this->guardedRef( $str, $argv, $parser );
214 $this->mInCite = false;
216 $parserOutput = $parser->getOutput();
217 $parserOutput->addModules( 'ext.cite.a11y' );
218 $parserOutput->addModuleStyles( 'ext.cite.styles' );
220 if ( is_callable( [ $frame, 'setVolatile' ] ) ) {
221 $frame->setVolatile();
224 // new <ref> tag, we may need to bump the ref data counter
225 // to avoid overwriting a previous group
226 $this->mBumpRefData = true;
232 * @param string|null $str Raw content of the <ref> tag.
233 * @param string[] $argv Arguments
234 * @param Parser $parser
235 * @param string $default_group
240 private function guardedRef(
244 $default_group = self::DEFAULT_GROUP
246 $this->mParser = $parser;
248 # The key here is the "name" attribute.
249 list( $key, $group, $follow ) = $this->refArg( $argv );
251 # Split these into groups.
252 if ( $group === null ) {
253 if ( $this->mInReferences ) {
254 $group = $this->mReferencesGroup;
256 $group = $default_group;
261 * This section deals with constructions of the form
264 * <ref name="foo"> BAR </ref>
267 if ( $this->mInReferences ) {
268 $isSectionPreview = $parser->getOptions()->getIsSectionPreview();
269 if ( $group != $this->mReferencesGroup ) {
270 # <ref> and <references> have conflicting group attributes.
271 $this->mReferencesErrors[] =
272 $this->error( 'cite_error_references_group_mismatch', htmlspecialchars( $group ) );
273 } elseif ( $str !== '' ) {
274 if ( !$isSectionPreview && !isset( $this->mRefs[$group] ) ) {
275 # Called with group attribute not defined in text.
276 $this->mReferencesErrors[] =
277 $this->error( 'cite_error_references_missing_group', htmlspecialchars( $group ) );
278 } elseif ( $key === null || $key === '' ) {
279 # <ref> calls inside <references> must be named
280 $this->mReferencesErrors[] =
281 $this->error( 'cite_error_references_no_key' );
282 } elseif ( !$isSectionPreview && !isset( $this->mRefs[$group][$key] ) ) {
283 # Called with name attribute not defined in text.
284 $this->mReferencesErrors[] =
285 $this->error( 'cite_error_references_missing_key', $key );
288 isset( $this->mRefs[$group][$key]['text'] ) &&
289 $str !== $this->mRefs[$group][$key]['text']
291 // two refs with same key and different content
292 // add error message to the original ref
293 $this->mRefs[$group][$key]['text'] .= ' ' . $this->error(
294 'cite_error_references_duplicate_key', $key, 'noparse'
297 # Assign the text to corresponding ref
298 $this->mRefs[$group][$key]['text'] = $str;
302 # <ref> called in <references> has no content.
303 $this->mReferencesErrors[] =
304 $this->error( 'cite_error_empty_references_define', $key );
310 # <ref ...></ref>. This construct is invalid if
311 # it's a contentful ref, but OK if it's a named duplicate and should
312 # be equivalent <ref ... />, for compatability with #tag.
313 if ( is_string( $key ) && $key !== '' ) {
316 $this->mRefCallStack[] = false;
318 return $this->error( 'cite_error_ref_no_input' );
322 if ( $key === false ) {
323 # TODO: Comment this case; what does this condition mean?
324 $this->mRefCallStack[] = false;
325 return $this->error( 'cite_error_ref_too_many_keys' );
328 if ( $str === null && $key === null ) {
329 # Something like <ref />; this makes no sense.
330 $this->mRefCallStack[] = false;
331 return $this->error( 'cite_error_ref_no_key' );
334 if ( preg_match( '/^[0-9]+$/', $key ) || preg_match( '/^[0-9]+$/', $follow ) ) {
335 # Numeric names mess up the resulting id's, potentially produ-
336 # cing duplicate id's in the XHTML. The Right Thing To Do
337 # would be to mangle them, but it's not really high-priority
338 # (and would produce weird id's anyway).
340 $this->mRefCallStack[] = false;
341 return $this->error( 'cite_error_ref_numeric_key' );
346 preg_replace( '#<([^ ]+?).*?>.*?</\\1 *>|<!--.*?-->#', '', $str )
348 # (bug T8199) This most likely implies that someone left off the
349 # closing </ref> tag, which will cause the entire article to be
350 # eaten up until the next <ref>. So we bail out early instead.
351 # The fancy regex above first tries chopping out anything that
352 # looks like a comment or SGML tag, which is a crude way to avoid
353 # false alarms for <nowiki>, <pre>, etc.
355 # Possible improvement: print the warning, followed by the contents
356 # of the <ref> tag. This way no part of the article will be eaten
359 $this->mRefCallStack[] = false;
360 return $this->error( 'cite_error_included_ref' );
363 if ( is_string( $key ) || is_string( $str ) ) {
364 # We don't care about the content: if the key exists, the ref
365 # is presumptively valid. Either it stores a new ref, or re-
366 # fers to an existing one. If it refers to a nonexistent ref,
367 # we'll figure that out later. Likewise it's definitely valid
368 # if there's any content, regardless of key.
370 return $this->stack( $str, $key, $group, $follow, $argv );
373 # Not clear how we could get here, but something is probably
374 # wrong with the types. Let's fail fast.
375 throw new Exception( 'Invalid $str and/or $key: ' . serialize( [ $str, $key ] ) );
379 * Parse the arguments to the <ref> tag
381 * "name" : Key of the reference.
382 * "group" : Group to which it belongs. Needs to be passed to <references /> too.
383 * "follow" : If the current reference is the continuation of another, key of that reference.
386 * @param string[] $argv The argument vector
387 * @return mixed false on invalid input, a string on valid
388 * input and null on no input
390 private function refArg( array $argv ) {
391 $cnt = count( $argv );
397 // There should only be one key or follow parameter, and one group parameter
398 // FIXME : this looks inconsistent, it should probably return a tuple
400 } elseif ( $cnt >= 1 ) {
401 if ( isset( $argv['name'] ) && isset( $argv['follow'] ) ) {
402 return [ false, false, false ];
404 if ( isset( $argv['name'] ) ) {
406 $key = Sanitizer::escapeId( $argv['name'], 'noninitial' );
407 unset( $argv['name'] );
410 if ( isset( $argv['follow'] ) ) {
412 $follow = Sanitizer::escapeId( $argv['follow'], 'noninitial' );
413 unset( $argv['follow'] );
416 if ( isset( $argv['group'] ) ) {
418 $group = $argv['group'];
419 unset( $argv['group'] );
424 return [ $key, $group, $follow ];
427 return [ false, false, false ];
431 return [ null, $group, false ];
436 * Populate $this->mRefs based on input and arguments to <ref>
438 * @param string $str Input from the <ref> tag
439 * @param string|null $key Argument to the <ref> tag as returned by $this->refArg()
440 * @param string $group
441 * @param string|null $follow
442 * @param string[] $call
447 private function stack( $str, $key = null, $group, $follow, array $call ) {
448 if ( !isset( $this->mRefs[$group] ) ) {
449 $this->mRefs[$group] = [];
451 if ( !isset( $this->mGroupCnt[$group] ) ) {
452 $this->mGroupCnt[$group] = 0;
454 if ( $follow != null ) {
455 if ( isset( $this->mRefs[$group][$follow] ) && is_array( $this->mRefs[$group][$follow] ) ) {
456 // add text to the note that is being followed
457 $this->mRefs[$group][$follow]['text'] .= ' ' . $str;
459 // insert part of note at the beginning of the group
460 $groupsCount = count( $this->mRefs[$group] );
461 for ( $k = 0; $k < $groupsCount; $k++ ) {
462 if ( !isset( $this->mRefs[$group][$k]['follow'] ) ) {
466 array_splice( $this->mRefs[$group], $k, 0, [ [
469 'key' => ++$this->mOutCnt,
472 array_splice( $this->mRefCallStack, $k, 0,
473 [ [ 'new', $call, $str, $key, $group, $this->mOutCnt ] ] );
475 // return an empty string : this is not a reference
479 if ( $key === null ) {
481 // $this->mRefs[$group][] = $str;
482 $this->mRefs[$group][] = [
485 'key' => ++$this->mOutCnt
487 $this->mRefCallStack[] = [ 'new', $call, $str, $key, $group, $this->mOutCnt ];
489 return $this->linkRef( $group, $this->mOutCnt );
491 if ( !is_string( $key ) ) {
492 throw new Exception( 'Invalid stack key: ' . serialize( $key ) );
496 if ( !isset( $this->mRefs[$group][$key] ) || !is_array( $this->mRefs[$group][$key] ) ) {
498 $this->mRefs[$group][$key] = [
501 'key' => ++$this->mOutCnt,
502 'number' => ++$this->mGroupCnt[$group]
504 $this->mRefCallStack[] = [ 'new', $call, $str, $key, $group, $this->mOutCnt ];
506 return $this->linkRef(
509 $this->mRefs[$group][$key]['key'] . "-" . $this->mRefs[$group][$key]['count'],
510 $this->mRefs[$group][$key]['number'],
511 "-" . $this->mRefs[$group][$key]['key']
515 // We've been here before
516 if ( $this->mRefs[$group][$key]['text'] === null && $str !== '' ) {
517 // If no text found before, use this text
518 $this->mRefs[$group][$key]['text'] = $str;
519 $this->mRefCallStack[] = [ 'assign', $call, $str, $key, $group,
520 $this->mRefs[$group][$key]['key'] ];
522 if ( $str != null && $str !== '' && $str !== $this->mRefs[$group][$key]['text'] ) {
523 // two refs with same key and different content
524 // add error message to the original ref
525 $this->mRefs[$group][$key]['text'] .= ' ' . $this->error(
526 'cite_error_references_duplicate_key', $key, 'noparse'
529 $this->mRefCallStack[] = [ 'increment', $call, $str, $key, $group,
530 $this->mRefs[$group][$key]['key'] ];
532 return $this->linkRef(
535 $this->mRefs[$group][$key]['key'] . "-" . ++$this->mRefs[$group][$key]['count'],
536 $this->mRefs[$group][$key]['number'],
537 "-" . $this->mRefs[$group][$key]['key']
542 * Partially undoes the effect of calls to stack()
544 * Called by guardedReferences()
546 * The option to define <ref> within <references> makes the
547 * behavior of <ref> context dependent. This is normally fine
548 * but certain operations (especially #tag) lead to out-of-order
549 * parser evaluation with the <ref> tags being processed before
550 * their containing <reference> element is read. This leads to
551 * stack corruption that this function works to fix.
553 * This function is not a total rollback since some internal
554 * counters remain incremented. Doing so prevents accidentally
555 * corrupting certain links.
557 * @param string $type
558 * @param string|null $key
559 * @param string $group
562 private function rollbackRef( $type, $key, $group, $index ) {
563 if ( !isset( $this->mRefs[$group] ) ) {
567 if ( $key === null ) {
568 foreach ( $this->mRefs[$group] as $k => $v ) {
569 if ( $this->mRefs[$group][$k]['key'] === $index ) {
576 // Sanity checks that specified element exists.
577 if ( $key === null ) {
580 if ( !isset( $this->mRefs[$group][$key] ) ) {
583 if ( $this->mRefs[$group][$key]['key'] != $index ) {
589 # Rollback the addition of new elements to the stack.
590 unset( $this->mRefs[$group][$key] );
591 if ( $this->mRefs[$group] === [] ) {
592 unset( $this->mRefs[$group] );
593 unset( $this->mGroupCnt[$group] );
597 # Rollback assignment of text to pre-existing elements.
598 $this->mRefs[$group][$key]['text'] = null;
599 # continue without break
601 # Rollback increase in named ref occurrences.
602 $this->mRefs[$group][$key]['count']--;
608 * Callback function for <references>
610 * @param string|null $str Raw content of the <references> tag.
611 * @param string[] $argv Arguments
612 * @param Parser $parser
613 * @param PPFrame $frame
617 public function references( $str, array $argv, Parser $parser, PPFrame $frame ) {
618 if ( $this->mInCite || $this->mInReferences ) {
619 if ( is_null( $str ) ) {
620 return htmlspecialchars( "<references/>" );
622 return htmlspecialchars( "<references>$str</references>" );
625 $this->mInReferences = true;
626 $ret = $this->guardedReferences( $str, $argv, $parser );
627 $this->mInReferences = false;
628 if ( is_callable( [ $frame, 'setVolatile' ] ) ) {
629 $frame->setVolatile();
635 * @param string|null $str Raw content of the <references> tag.
636 * @param string[] $argv
637 * @param Parser $parser
638 * @param string $group
642 private function guardedReferences(
646 $group = self::DEFAULT_GROUP
648 global $wgCiteResponsiveReferences;
650 $this->mParser = $parser;
652 if ( isset( $argv['group'] ) ) {
653 $group = $argv['group'];
654 unset( $argv['group'] );
657 if ( strval( $str ) !== '' ) {
658 $this->mReferencesGroup = $group;
660 # Detect whether we were sent already rendered <ref>s.
661 # Mostly a side effect of using #tag to call references.
662 # The following assumes that the parsed <ref>s sent within
663 # the <references> block were the most recent calls to
664 # <ref>. This assumption is true for all known use cases,
665 # but not strictly enforced by the parser. It is possible
666 # that some unusual combination of #tag, <references> and
667 # conditional parser functions could be created that would
668 # lead to malformed references here.
669 $count = substr_count( $str, Parser::MARKER_PREFIX . "-ref-" );
672 # Undo effects of calling <ref> while unaware of containing <references>
673 for ( $i = 1; $i <= $count; $i++ ) {
674 if ( !$this->mRefCallStack ) {
678 $call = array_pop( $this->mRefCallStack );
679 $redoStack[] = $call;
680 if ( $call !== false ) {
681 list( $type, $ref_argv, $ref_str,
682 $ref_key, $ref_group, $ref_index ) = $call;
683 $this->rollbackRef( $type, $ref_key, $ref_group, $ref_index );
687 # Rerun <ref> call now that mInReferences is set.
688 for ( $i = count( $redoStack ) - 1; $i >= 0; $i-- ) {
689 $call = $redoStack[$i];
690 if ( $call !== false ) {
691 list( $type, $ref_argv, $ref_str,
692 $ref_key, $ref_group, $ref_index ) = $call;
693 $this->guardedRef( $ref_str, $ref_argv, $parser );
697 # Parse $str to process any unparsed <ref> tags.
698 $parser->recursiveTagParse( $str );
701 $this->mRefCallStack = [];
704 if ( isset( $argv['responsive'] ) ) {
705 $responsive = $argv['responsive'] !== '0';
706 unset( $argv['responsive'] );
708 $responsive = $wgCiteResponsiveReferences;
711 // There are remaining parameters we don't recognise
713 return $this->error( 'cite_error_references_invalid_parameters' );
716 $s = $this->referencesFormat( $group, $responsive );
718 # Append errors generated while processing <references>
719 if ( $this->mReferencesErrors ) {
720 $s .= "\n" . implode( "<br />\n", $this->mReferencesErrors );
721 $this->mReferencesErrors = [];
727 * Make output to be returned from the references() function
729 * @param string $group
730 * @param bool $responsive
731 * @return string HTML ready for output
733 private function referencesFormat( $group, $responsive ) {
734 if ( !$this->mRefs || !isset( $this->mRefs[$group] ) ) {
739 foreach ( $this->mRefs[$group] as $k => $v ) {
740 $ent[] = $this->referencesFormatEntry( $k, $v );
743 // Add new lines between the list items (ref entires) to avoid confusing tidy (bug 13073).
744 // Note: This builds a string of wikitext, not html.
745 $parserInput = Html::rawElement( 'ol', [ 'class' => [ 'references' ] ],
746 "\n" . implode( "\n", $ent ) . "\n"
749 // Let's try to cache it.
750 global $wgCiteCacheReferences, $wgMemc;
752 if ( $wgCiteCacheReferences ) {
753 $cacheKey = wfMemcKey(
756 $this->mParser->Title()->getArticleID()
758 $data = $wgMemc->get( $cacheKey );
761 if ( !$data || !$this->mParser->isValidHalfParsedText( $data ) ) {
762 // Live hack: parse() adds two newlines on WM, can't reproduce it locally -ævar
763 $ret = rtrim( $this->mParser->recursiveTagParse( $parserInput ), "\n" );
765 if ( $wgCiteCacheReferences ) {
766 $serData = $this->mParser->serializeHalfParsedText( $ret );
767 $wgMemc->set( $cacheKey, $serData, 86400 );
771 $ret = $this->mParser->unserializeHalfParsedText( $data );
775 // Use a DIV wrap because column-count on a list directly is broken in Chrome.
776 // See https://bugs.chromium.org/p/chromium/issues/detail?id=498730.
777 $wrapClasses = [ 'mw-references-wrap' ];
778 if ( count( $this->mRefs[$group] ) > 10 ) {
779 $wrapClasses[] = 'mw-references-columns';
781 $ret = Html::rawElement( 'div', [ 'class' => $wrapClasses ], $ret );
784 if ( !$this->mParser->getOptions()->getIsPreview() ) {
785 // save references data for later use by LinksUpdate hooks
786 $this->saveReferencesData( $group );
789 // done, clean up so we can reuse the group
790 unset( $this->mRefs[$group] );
791 unset( $this->mGroupCnt[$group] );
797 * Format a single entry for the referencesFormat() function
799 * @param string $key The key of the reference
800 * @param mixed $val The value of the reference, string for anonymous
801 * references, array for user-suppplied
802 * @return string Wikitext
804 private function referencesFormatEntry( $key, $val ) {
805 // Anonymous reference
806 if ( !is_array( $val ) ) {
808 'cite_references_link_one',
809 self::getReferencesKey( $key ),
810 $this->refKey( $key ),
811 $this->referenceText( $key, $val )
812 )->inContentLanguage()->plain();
814 $text = $this->referenceText( $key, $val['text'] );
815 if ( isset( $val['follow'] ) ) {
817 'cite_references_no_link',
818 self::getReferencesKey( $val['follow'] ),
820 )->inContentLanguage()->plain();
822 if ( !isset( $val['count'] ) ) {
823 // this handles the case of section preview for list-defined references
824 return wfMessage( 'cite_references_link_many',
825 self::getReferencesKey( $key . "-" . ( isset( $val['key'] ) ? $val['key'] : '' ) ),
828 )->inContentLanguage()->plain();
830 if ( $val['count'] < 0 ) {
832 'cite_references_link_one',
833 self::getReferencesKey( $val['key'] ),
834 # $this->refKey( $val['key'], $val['count'] ),
835 $this->refKey( $val['key'] ),
837 )->inContentLanguage()->plain();
838 // Standalone named reference, I want to format this like an
839 // anonymous reference because displaying "1. 1.1 Ref text" is
840 // overkill and users frequently use named references when they
841 // don't need them for convenience
843 if ( $val['count'] === 0 ) {
845 'cite_references_link_one',
846 self::getReferencesKey( $key . "-" . $val['key'] ),
847 # $this->refKey( $key, $val['count'] ),
848 $this->refKey( $key, $val['key'] . "-" . $val['count'] ),
850 )->inContentLanguage()->plain();
851 // Named references with >1 occurrences
854 // for group handling, we have an extra key here.
855 for ( $i = 0; $i <= $val['count']; ++$i ) {
856 $links[] = wfMessage(
857 'cite_references_link_many_format',
858 $this->refKey( $key, $val['key'] . "-$i" ),
859 $this->referencesFormatEntryNumericBacklinkLabel( $val['number'], $i, $val['count'] ),
860 $this->referencesFormatEntryAlternateBacklinkLabel( $i )
861 )->inContentLanguage()->plain();
864 $list = $this->listToText( $links );
866 return wfMessage( 'cite_references_link_many',
867 self::getReferencesKey( $key . "-" . $val['key'] ),
870 )->inContentLanguage()->plain();
874 * Returns formatted reference text
876 * @param String $text
879 private function referenceText( $key, $text ) {
880 if ( !isset( $text ) || $text === '' ) {
881 if ( $this->mParser->getOptions()->getIsSectionPreview() ) {
882 return $this->warning( 'cite_warning_sectionpreview_no_text', $key, 'noparse' );
884 return $this->error( 'cite_error_references_no_text', $key, 'noparse' );
886 return '<span class="reference-text">' . rtrim( $text, "\n" ) . "</span>\n";
890 * Generate a numeric backlink given a base number and an
891 * offset, e.g. $base = 1, $offset = 2; = 1.2
892 * Since bug #5525, it correctly does 1.9 -> 1.10 as well as 1.099 -> 1.100
896 * @param int $base The base
897 * @param int $offset The offset
898 * @param int $max Maximum value expected.
901 private function referencesFormatEntryNumericBacklinkLabel( $base, $offset, $max ) {
903 $scope = strlen( $max );
904 $ret = $wgContLang->formatNum(
905 sprintf( "%s.%0{$scope}s", $base, $offset )
911 * Generate a custom format backlink given an offset, e.g.
912 * $offset = 2; = c if $this->mBacklinkLabels = [ 'a',
913 * 'b', 'c', ...]. Return an error if the offset > the # of
916 * @param int $offset The offset
920 private function referencesFormatEntryAlternateBacklinkLabel( $offset ) {
921 if ( !isset( $this->mBacklinkLabels ) ) {
922 $this->genBacklinkLabels();
924 if ( isset( $this->mBacklinkLabels[$offset] ) ) {
925 return $this->mBacklinkLabels[$offset];
928 return $this->error( 'cite_error_references_no_backlink_label', null, 'noparse' );
933 * Generate a custom format link for a group given an offset, e.g.
934 * the second <ref group="foo"> is b if $this->mLinkLabels["foo"] =
935 * [ 'a', 'b', 'c', ...].
936 * Return an error if the offset > the # of array items
938 * @param int $offset The offset
939 * @param string $group The group name
940 * @param string $label The text to use if there's no message for them.
944 private function getLinkLabel( $offset, $group, $label ) {
945 $message = "cite_link_label_group-$group";
946 if ( !isset( $this->mLinkLabels[$group] ) ) {
947 $this->genLinkLabels( $group, $message );
949 if ( $this->mLinkLabels[$group] === false ) {
950 // Use normal representation, ie. "$group 1", "$group 2"...
954 if ( isset( $this->mLinkLabels[$group][$offset - 1] ) ) {
955 return $this->mLinkLabels[$group][$offset - 1];
958 return $this->error( 'cite_error_no_link_label_group', [ $group, $message ], 'noparse' );
963 * Return an id for use in wikitext output based on a key and
964 * optionally the number of it, used in <references>, not <ref>
965 * (since otherwise it would link to itself)
969 * @param string $key The key
970 * @param int $num The number of the key
971 * @return string A key for use in wikitext
973 private function refKey( $key, $num = null ) {
974 $prefix = wfMessage( 'cite_reference_link_prefix' )->inContentLanguage()->text();
975 $suffix = wfMessage( 'cite_reference_link_suffix' )->inContentLanguage()->text();
976 if ( isset( $num ) ) {
977 $key = wfMessage( 'cite_reference_link_key_with_num', $key, $num )
978 ->inContentLanguage()->plain();
981 return "$prefix$key$suffix";
985 * Return an id for use in wikitext output based on a key and
986 * optionally the number of it, used in <ref>, not <references>
987 * (since otherwise it would link to itself)
991 * @param string $key The key
992 * @return string A key for use in wikitext
994 public static function getReferencesKey( $key ) {
995 $prefix = wfMessage( 'cite_references_link_prefix' )->inContentLanguage()->text();
996 $suffix = wfMessage( 'cite_references_link_suffix' )->inContentLanguage()->text();
998 return "$prefix$key$suffix";
1002 * Generate a link (<sup ...) for the <ref> element from a key
1003 * and return XHTML ready for output
1005 * @param string $group
1006 * @param string $key The key for the link
1007 * @param int $count The index of the key, used for distinguishing
1008 * multiple occurrences of the same key
1009 * @param int $label The label to use for the link, I want to
1010 * use the same label for all occourances of
1011 * the same named reference.
1012 * @param string $subkey
1016 private function linkRef( $group, $key, $count = null, $label = null, $subkey = '' ) {
1018 $label = is_null( $label ) ? ++$this->mGroupCnt[$group] : $label;
1021 $this->mParser->recursiveTagParse(
1023 'cite_reference_link',
1024 $this->refKey( $key, $count ),
1025 self::getReferencesKey( $key . $subkey ),
1026 $this->getLinkLabel( $label, $group,
1027 ( ( $group === self::DEFAULT_GROUP ) ? '' : "$group " ) . $wgContLang->formatNum( $label ) )
1028 )->inContentLanguage()->plain()
1033 * This does approximately the same thing as
1034 * Language::listToText() but due to this being used for a
1035 * slightly different purpose (people might not want , as the
1036 * first separator and not 'and' as the second, and this has to
1037 * use messages from the content language) I'm rolling my own.
1041 * @param array $arr The array to format
1044 private function listToText( $arr ) {
1045 $cnt = count( $arr );
1047 $sep = wfMessage( 'cite_references_link_many_sep' )->inContentLanguage()->plain();
1048 $and = wfMessage( 'cite_references_link_many_and' )->inContentLanguage()->plain();
1051 // Enforce always returning a string
1052 return (string)$arr[0];
1054 $t = array_slice( $arr, 0, $cnt - 1 );
1055 return implode( $sep, $t ) . $and . $arr[$cnt - 1];
1060 * Generate the labels to pass to the
1061 * 'cite_references_link_many_format' message, the format is an
1062 * arbitrary number of tokens separated by [\t\n ]
1064 private function genBacklinkLabels() {
1065 $text = wfMessage( 'cite_references_link_many_format_backlink_labels' )
1066 ->inContentLanguage()->plain();
1067 $this->mBacklinkLabels = preg_split( '#[\n\t ]#', $text );
1071 * Generate the labels to pass to the
1072 * 'cite_reference_link' message instead of numbers, the format is an
1073 * arbitrary number of tokens separated by [\t\n ]
1075 * @param string $group
1076 * @param string $message
1078 private function genLinkLabels( $group, $message ) {
1080 $msg = wfMessage( $message )->inContentLanguage();
1081 if ( $msg->exists() ) {
1082 $text = $msg->plain();
1084 $this->mLinkLabels[$group] = ( !$text ) ? false : preg_split( '#[\n\t ]#', $text );
1088 * Gets run when Parser::clearState() gets run, since we don't
1089 * want the counts to transcend pages and other instances
1091 * @param Parser $parser
1095 public function clearState( Parser &$parser ) {
1096 if ( $parser->extCite !== $this ) {
1097 return $parser->extCite->clearState( $parser );
1100 # Don't clear state when we're in the middle of parsing
1102 if ( $this->mInCite || $this->mInReferences ) {
1106 $this->mGroupCnt = [];
1108 $this->mCallCnt = 0;
1110 $this->mReferencesErrors = [];
1111 $this->mRefCallStack = [];
1117 * Gets run when the parser is cloned.
1119 * @param Parser $parser
1123 public function cloneState( Parser $parser ) {
1124 if ( $parser->extCite !== $this ) {
1125 return $parser->extCite->cloneState( $parser );
1128 $parser->extCite = clone $this;
1129 $parser->setHook( 'ref', [ $parser->extCite, 'ref' ] );
1130 $parser->setHook( 'references', [ $parser->extCite, 'references' ] );
1132 // Clear the state, making sure it will actually work.
1133 $parser->extCite->mInCite = false;
1134 $parser->extCite->mInReferences = false;
1135 $parser->extCite->clearState( $parser );
1141 * Called at the end of page processing to append a default references
1142 * section, if refs were used without a main references tag. If there are references
1143 * in a custom group, and there is no references tag for it, show an error
1144 * message for that group.
1145 * If we are processing a section preview, this adds the missing
1146 * references tags and does not add the errors.
1148 * @param bool $afterParse True if called from the ParserAfterParse hook
1149 * @param Parser $parser
1150 * @param string $text
1154 public function checkRefsNoReferences( $afterParse, &$parser, &$text ) {
1155 global $wgCiteResponsiveReferences;
1156 if ( is_null( $parser->extCite ) ) {
1159 if ( $parser->extCite !== $this ) {
1160 return $parser->extCite->checkRefsNoReferences( $afterParse, $parser, $text );
1163 if ( $afterParse ) {
1164 $this->mHaveAfterParse = true;
1165 } elseif ( $this->mHaveAfterParse ) {
1169 if ( !$parser->getOptions()->getIsPreview() ) {
1170 // save references data for later use by LinksUpdate hooks
1171 if ( $this->mRefs && isset( $this->mRefs[self::DEFAULT_GROUP] ) ) {
1172 $this->saveReferencesData();
1174 $isSectionPreview = false;
1176 $isSectionPreview = $parser->getOptions()->getIsSectionPreview();
1180 foreach ( $this->mRefs as $group => $refs ) {
1184 if ( $group === self::DEFAULT_GROUP || $isSectionPreview ) {
1185 $s .= $this->referencesFormat( $group, $wgCiteResponsiveReferences );
1188 $this->error( 'cite_error_group_refs_without_references', htmlspecialchars( $group ) );
1191 if ( $isSectionPreview && $s !== '' ) {
1192 // provide a preview of references in its own section
1193 $text .= "\n" . '<div class="mw-ext-cite-cite_section_preview_references" >';
1194 $headerMsg = wfMessage( 'cite_section_preview_references' );
1195 if ( !$headerMsg->isDisabled() ) {
1196 $text .= '<h2 id="mw-ext-cite-cite_section_preview_references_header" >'
1197 . $headerMsg->escaped()
1200 $text .= $s . '</div>';
1208 * Saves references in parser extension data
1209 * This is called by each <references/> tag, and by checkRefsNoReferences
1210 * Assumes $this->mRefs[$group] is set
1214 private function saveReferencesData( $group = self::DEFAULT_GROUP ) {
1215 global $wgCiteStoreReferencesData;
1216 if ( !$wgCiteStoreReferencesData ) {
1219 $savedRefs = $this->mParser->getOutput()->getExtensionData( self::EXT_DATA_KEY );
1220 if ( $savedRefs === null ) {
1221 // Initialize array structure
1224 'version' => self::DATA_VERSION_NUMBER,
1227 if ( $this->mBumpRefData ) {
1228 // This handles pages with multiple <references/> tags with <ref> tags in between.
1229 // On those, a group can appear several times, so we need to avoid overwriting
1230 // a previous appearance.
1231 $savedRefs['refs'][] = [];
1232 $this->mBumpRefData = false;
1234 $n = count( $savedRefs['refs'] ) - 1;
1236 $savedRefs['refs'][$n][$group] = $this->mRefs[$group];
1238 $this->mParser->getOutput()->setExtensionData( self::EXT_DATA_KEY, $savedRefs );
1242 * Hook for the InlineEditor extension.
1243 * If any ref or reference reference tag is in the text,
1244 * the entire page should be reparsed, so we return false in that case.
1250 public function checkAnyCalls( &$output ) {
1252 /* InlineEditor always uses $wgParser */
1253 return ( $wgParser->extCite->mCallCnt <= 0 );
1257 * Initialize the parser hooks
1259 * @param Parser $parser
1263 public static function setHooks( Parser $parser ) {
1266 $parser->extCite = new self();
1268 if ( !self::$hooksInstalled ) {
1269 $wgHooks['ParserClearState'][] = [ $parser->extCite, 'clearState' ];
1270 $wgHooks['ParserCloned'][] = [ $parser->extCite, 'cloneState' ];
1271 $wgHooks['ParserAfterParse'][] = [ $parser->extCite, 'checkRefsNoReferences', true ];
1272 $wgHooks['ParserBeforeTidy'][] = [ $parser->extCite, 'checkRefsNoReferences', false ];
1273 $wgHooks['InlineEditorPartialAfterParse'][] = [ $parser->extCite, 'checkAnyCalls' ];
1274 self::$hooksInstalled = true;
1276 $parser->setHook( 'ref', [ $parser->extCite, 'ref' ] );
1277 $parser->setHook( 'references', [ $parser->extCite, 'references' ] );
1283 * Return an error message based on an error ID
1285 * @param string $key Message name for the error
1286 * @param string|null $param Parameter to pass to the message
1287 * @param string $parse Whether to parse the message ('parse') or not ('noparse')
1288 * @return string XHTML or wikitext ready for output
1290 private function error( $key, $param = null, $parse = 'parse' ) {
1291 # For ease of debugging and because errors are rare, we
1292 # use the user language and split the parser cache.
1293 $lang = $this->mParser->getOptions()->getUserLangObj();
1294 $dir = $lang->getDir();
1296 # We rely on the fact that PHP is okay with passing unused argu-
1297 # ments to functions. If $1 is not used in the message, wfMessage will
1298 # just ignore the extra parameter.
1301 wfMessage( $key, $param )->inLanguage( $lang )->plain()
1303 ->inLanguage( $lang )
1306 $this->mParser->addTrackingCategory( 'cite-tracking-category-cite-error' );
1308 $ret = Html::rawElement(
1311 'class' => 'error mw-ext-cite-error',
1312 'lang' => $lang->getHtmlCode(),
1318 if ( $parse === 'parse' ) {
1319 $ret = $this->mParser->recursiveTagParse( $ret );
1326 * Return a warning message based on a warning ID
1328 * @param string $key Message name for the warning. Name should start with cite_warning_
1329 * @param string|null $param Parameter to pass to the message
1330 * @param string $parse Whether to parse the message ('parse') or not ('noparse')
1331 * @return string XHTML or wikitext ready for output
1333 private function warning( $key, $param = null, $parse = 'parse' ) {
1334 # For ease of debugging and because errors are rare, we
1335 # use the user language and split the parser cache.
1336 $lang = $this->mParser->getOptions()->getUserLangObj();
1337 $dir = $lang->getDir();
1339 # We rely on the fact that PHP is okay with passing unused argu-
1340 # ments to functions. If $1 is not used in the message, wfMessage will
1341 # just ignore the extra parameter.
1344 wfMessage( $key, $param )->inLanguage( $lang )->plain()
1346 ->inLanguage( $lang )
1349 $key = preg_replace( '/^cite_warning_/', '', $key ) . '';
1350 $ret = Html::rawElement(
1353 'class' => 'warning mw-ext-cite-warning mw-ext-cite-warning-' .
1354 Sanitizer::escapeClass( $key ),
1355 'lang' => $lang->getHtmlCode(),
1361 if ( $parse === 'parse' ) {
1362 $ret = $this->mParser->recursiveTagParse( $ret );
1369 * Fetch references stored for the given title in page_props
1370 * For performance, results are cached
1372 * @param Title $title
1373 * @return array|false
1375 public static function getStoredReferences( Title $title ) {
1376 global $wgCiteStoreReferencesData;
1377 if ( !$wgCiteStoreReferencesData ) {
1380 $cache = ObjectCache::getMainWANInstance();
1381 $key = $cache->makeKey( self::EXT_DATA_KEY, $title->getArticleID() );
1382 return $cache->getWithSetCallback(
1384 self::CACHE_DURATION_ONFETCH,
1385 function ( $oldValue, &$ttl, array &$setOpts ) use ( $title ) {
1386 $dbr = wfGetDB( DB_REPLICA );
1387 $setOpts += Database::getCacheSetOptions( $dbr );
1388 return self::recursiveFetchRefsFromDB( $title, $dbr );
1391 'checkKeys' => [ $key ],
1398 * Reconstructs compressed json by successively retrieving the properties references-1, -2, etc
1399 * It attempts the next step when a decoding error occurs.
1400 * Returns json_decoded uncompressed string, with validation of json
1402 * @param Title $title
1403 * @param DatabaseBase $dbr
1404 * @param string $string
1406 * @return array|false
1408 private static function recursiveFetchRefsFromDB( Title $title, DatabaseBase $dbr,
1409 $string = '', $i = 1 ) {
1410 $id = $title->getArticleID();
1411 $result = $dbr->selectField(
1416 'pp_propname' => 'references-' . $i
1420 if ( $result !== false ) {
1422 $decodedString = gzdecode( $string );
1423 if ( $decodedString !== false ) {
1424 $json = json_decode( $decodedString, true );
1425 if ( json_last_error() === JSON_ERROR_NONE ) {
1429 // shouldn't happen since when string is truncated, gzdecode should fail
1430 wfDebug( "Corrupted json detected when retrieving stored references for title id $id" );
1432 // if gzdecode fails, try to fetch next references- property value
1433 return self::recursiveFetchRefsFromDB( $title, $dbr, $string, ++$i );
1436 // no refs stored in page_props at this index
1439 wfDebug( "Failed to retrieve stored references for title id $id" );