<?php
/**
* File for magic words
- * @package MediaWiki
- * @subpackage Parser
+ * @addtogroup Parser
*/
-/**
- * private
- */
-$wgMagicFound = false;
-
-/** Actual keyword to be used is set in Language.php */
-define('MAG_REDIRECT', 0);
-define('MAG_NOTOC', 1);
-define('MAG_START', 2);
-define('MAG_CURRENTMONTH', 3);
-define('MAG_CURRENTMONTHNAME', 4);
-define('MAG_CURRENTMONTHNAMEGEN', 5);
-define('MAG_CURRENTMONTHABBREV', 6);
-define('MAG_CURRENTDAY', 7);
-define('MAG_CURRENTDAYNAME', 8);
-define('MAG_CURRENTYEAR', 9);
-define('MAG_CURRENTTIME', 10);
-define('MAG_NUMBEROFARTICLES', 11);
-define('MAG_SUBST', 12);
-define('MAG_MSG', 13);
-define('MAG_MSGNW', 14);
-define('MAG_NOEDITSECTION', 15);
-define('MAG_END', 16);
-define('MAG_IMG_THUMBNAIL', 17);
-define('MAG_IMG_RIGHT', 18);
-define('MAG_IMG_LEFT', 19);
-define('MAG_IMG_NONE', 20);
-define('MAG_IMG_WIDTH', 21);
-define('MAG_IMG_CENTER', 22);
-define('MAG_INT', 23);
-define('MAG_FORCETOC', 24);
-define('MAG_SITENAME', 25);
-define('MAG_NS', 26);
-define('MAG_LOCALURL', 27);
-define('MAG_LOCALURLE', 28);
-define('MAG_SERVER', 29);
-define('MAG_IMG_FRAMED', 30);
-define('MAG_PAGENAME', 31);
-define('MAG_PAGENAMEE', 32);
-define('MAG_NAMESPACE', 33);
-define('MAG_TOC', 34);
-define('MAG_GRAMMAR', 35);
-define('MAG_NOTITLECONVERT', 36);
-define('MAG_NOCONTENTCONVERT', 37);
-define('MAG_CURRENTWEEK', 38);
-define('MAG_CURRENTDOW', 39);
-define('MAG_REVISIONID', 40);
-define('MAG_SCRIPTPATH', 41);
-define('MAG_SERVERNAME', 42);
-define('MAG_NUMBEROFFILES', 43);
-
-$wgVariableIDs = array(
- MAG_CURRENTMONTH,
- MAG_CURRENTMONTHNAME,
- MAG_CURRENTMONTHNAMEGEN,
- MAG_CURRENTMONTHABBREV,
- MAG_CURRENTDAY,
- MAG_CURRENTDAYNAME,
- MAG_CURRENTYEAR,
- MAG_CURRENTTIME,
- MAG_NUMBEROFARTICLES,
- MAG_NUMBEROFFILES,
- MAG_SITENAME,
- MAG_SERVER,
- MAG_SERVERNAME,
- MAG_SCRIPTPATH,
- MAG_PAGENAME,
- MAG_PAGENAMEE,
- MAG_NAMESPACE,
- MAG_CURRENTWEEK,
- MAG_CURRENTDOW,
- MAG_REVISIONID,
-);
-
/**
* This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
* Usage:
- * if (MagicWord::get( MAG_REDIRECT )->match( $text ) )
- *
- * Possible future improvements:
+ * if (MagicWord::get( 'redirect' )->match( $text ) )
+ *
+ * Possible future improvements:
* * Simultaneous searching for a number of magic words
- * * $wgMagicWords in shared memory
+ * * MagicWord::$mObjects in shared memory
*
- * Please avoid reading the data out of one of these objects and then writing
+ * Please avoid reading the data out of one of these objects and then writing
* special case code. If possible, add another match()-like function here.
*
- * @package MediaWiki
+ * To add magic words in an extension, use the LanguageGetMagic hook. For
+ * magic words which are also Parser variables, add a MagicWordwgVariableIDs
+ * hook. Use string keys.
+ *
*/
class MagicWord {
/**#@+
- * @access private
+ * @private
*/
var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
var $mRegexStart, $mBaseRegex, $mVariableRegex;
- var $mModified;
+ var $mModified, $mFound;
+
+ static public $mVariableIDsInitialised = false;
+ static public $mVariableIDs = array(
+ 'currentmonth',
+ 'currentmonthname',
+ 'currentmonthnamegen',
+ 'currentmonthabbrev',
+ 'currentday',
+ 'currentday2',
+ 'currentdayname',
+ 'currentyear',
+ 'currenttime',
+ 'currenthour',
+ 'localmonth',
+ 'localmonthname',
+ 'localmonthnamegen',
+ 'localmonthabbrev',
+ 'localday',
+ 'localday2',
+ 'localdayname',
+ 'localyear',
+ 'localtime',
+ 'localhour',
+ 'numberofarticles',
+ 'numberoffiles',
+ 'numberofedits',
+ 'sitename',
+ 'server',
+ 'servername',
+ 'scriptpath',
+ 'pagename',
+ 'pagenamee',
+ 'fullpagename',
+ 'fullpagenamee',
+ 'namespace',
+ 'namespacee',
+ 'currentweek',
+ 'currentdow',
+ 'localweek',
+ 'localdow',
+ 'revisionid',
+ 'revisionday',
+ 'revisionday2',
+ 'revisionmonth',
+ 'revisionyear',
+ 'revisiontimestamp',
+ 'subpagename',
+ 'subpagenamee',
+ 'displaytitle',
+ 'talkspace',
+ 'talkspacee',
+ 'subjectspace',
+ 'subjectspacee',
+ 'talkpagename',
+ 'talkpagenamee',
+ 'subjectpagename',
+ 'subjectpagenamee',
+ 'numberofusers',
+ 'newsectionlink',
+ 'numberofpages',
+ 'currentversion',
+ 'basepagename',
+ 'basepagenamee',
+ 'urlencode',
+ 'currenttimestamp',
+ 'localtimestamp',
+ 'directionmark',
+ 'language',
+ 'contentlanguage',
+ 'pagesinnamespace',
+ 'numberofadmins',
+ 'defaultsort',
+ );
+
+ static public $mObjects = array();
+
/**#@-*/
- function MagicWord($id = 0, $syn = '', $cs = false) {
+ function __construct($id = 0, $syn = '', $cs = false) {
$this->mId = $id;
$this->mSynonyms = (array)$syn;
$this->mCaseSensitive = $cs;
* Factory: creates an object representing an ID
* @static
*/
- function &get( $id ) {
- global $wgMagicWords;
-
- if ( !is_array( $wgMagicWords ) ) {
- wfDebugDieBacktrace( "Incorrect initialisation order, \$wgMagicWords does not exist\n" );
- }
- if (!array_key_exists( $id, $wgMagicWords ) ) {
+ static function &get( $id ) {
+ if (!array_key_exists( $id, self::$mObjects ) ) {
$mw = new MagicWord();
$mw->load( $id );
- $wgMagicWords[$id] = $mw;
+ self::$mObjects[$id] = $mw;
+ }
+ return self::$mObjects[$id];
+ }
+
+ /**
+ * Get an array of parser variable IDs
+ */
+ static function getVariableIDs() {
+ if ( !self::$mVariableIDsInitialised ) {
+ # Deprecated constant definition hook, available for extensions that need it
+ $magicWords = array();
+ wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
+ foreach ( $magicWords as $word ) {
+ define( $word, $word );
+ }
+
+ # Get variable IDs
+ wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
+ self::$mVariableIDsInitialised = true;
}
- return $wgMagicWords[$id];
+ return self::$mVariableIDs;
}
-
+
# Initialises this object with an ID
function load( $id ) {
- global $wgContLang;
+ global $wgContLang;
$this->mId = $id;
$wgContLang->getMagic( $this );
+ if ( !$this->mSynonyms ) {
+ $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
+ #throw new MWException( "Error: invalid magic word '$id'" );
+ wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
+ }
}
-
+
/**
* Preliminary initialisation
* @private
# This was used for matching "$1" variables, but different uses of the feature will have
# different restrictions, which should be checked *after* the MagicWord has been matched,
# not here. - IMSoP
- $escSyn = array_map( 'preg_quote', $this->mSynonyms );
+
+ $escSyn = array();
+ foreach ( $this->mSynonyms as $synonym )
+ // In case a magic word contains /, like that's going to happen;)
+ $escSyn[] = preg_quote( $synonym, '/' );
$this->mBaseRegex = implode( '|', $escSyn );
- $case = $this->mCaseSensitive ? '' : 'i';
+
+ $case = $this->mCaseSensitive ? '' : 'iu';
$this->mRegex = "/{$this->mBaseRegex}/{$case}";
$this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
$this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
- $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
+ $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
"/^(?:{$this->mBaseRegex})$/{$case}" );
}
-
+
/**
* Gets a regex representing matching the word
*/
return $this->mRegex;
}
+ /**
+ * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
+ * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
+ * the complete expression
+ */
+ function getRegexCase() {
+ if ( $this->mRegex === '' )
+ $this->initRegex();
+
+ return $this->mCaseSensitive ? '' : 'iu';
+ }
+
/**
* Gets a regex matching the word, if it is at the string start
*/
}
return $this->mBaseRegex;
}
-
+
/**
* Returns true if the text contains the word
* @return bool
* is one.
*/
function matchVariableStartToEnd( $text ) {
+ $matches = array();
$matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
if ( $matchcount == 0 ) {
return NULL;
- } elseif ( count($matches) == 1 ) {
- return $matches[0];
} else {
- # multiple matched parts (variable match); some will be empty because of synonyms
- # the variable will be the second non-empty one so remove any blank elements and re-sort the indices
+ # multiple matched parts (variable match); some will be empty because of
+ # synonyms. The variable will be the second non-empty one so remove any
+ # blank elements and re-sort the indices.
+ # See also bug 6526
+
$matches = array_values(array_filter($matches));
- return $matches[1];
+
+ if ( count($matches) == 1 ) { return $matches[0]; }
+ else { return $matches[1]; }
}
}
* input string, removing all instances of the word
*/
function matchAndRemove( &$text ) {
- global $wgMagicFound;
- $wgMagicFound = false;
- $text = preg_replace_callback( $this->getRegex(), 'pregRemoveAndRecord', $text );
- return $wgMagicFound;
+ $this->mFound = false;
+ $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
+ return $this->mFound;
}
function matchStartAndRemove( &$text ) {
- global $wgMagicFound;
- $wgMagicFound = false;
- $text = preg_replace_callback( $this->getRegexStart(), 'pregRemoveAndRecord', $text );
- return $wgMagicFound;
- }
+ $this->mFound = false;
+ $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
+ return $this->mFound;
+ }
+ /**
+ * Used in matchAndRemove()
+ * @private
+ **/
+ function pregRemoveAndRecord( ) {
+ $this->mFound = true;
+ return '';
+ }
/**
* Replaces the word with something else
*/
- function replace( $replacement, $subject ) {
- $res = preg_replace( $this->getRegex(), $replacement, $subject );
+ function replace( $replacement, $subject, $limit=-1 ) {
+ $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
$this->mModified = !($res === $subject);
return $res;
}
* Input word must contain $1
*/
function substituteCallback( $text, $callback ) {
- $regex = $this->getVariableRegex();
$res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
$this->mModified = !($res === $text);
return $res;
function getVariableRegex() {
if ( $this->mVariableRegex == '' ) {
$this->initRegex();
- }
+ }
return $this->mVariableRegex;
}
function getVariableStartToEndRegex() {
if ( $this->mVariableStartToEndRegex == '' ) {
$this->initRegex();
- }
+ }
return $this->mVariableStartToEndRegex;
}
return $this->mSynonyms[$i];
}
+ function getSynonyms() {
+ return $this->mSynonyms;
+ }
+
/**
- * Returns true if the last call to replace() or substituteCallback()
+ * Returns true if the last call to replace() or substituteCallback()
* returned a modified text, otherwise false.
*/
function getWasModified(){
* This method uses the php feature to do several replacements at the same time,
* thereby gaining some efficiency. The result is placed in the out variable
* $result. The return value is true if something was replaced.
- * @static
+ * @static
**/
function replaceMultiple( $magicarr, $subject, &$result ){
$search = array();
* lookup in a list of magic words
*/
function addToArray( &$array, $value ) {
+ global $wgContLang;
foreach ( $this->mSynonyms as $syn ) {
- $array[$syn] = $value;
+ $array[$wgContLang->lc($syn)] = $value;
}
}
+
+ function isCaseSensitive() {
+ return $this->mCaseSensitive;
+ }
+
+ function getId() {
+ return $this->mId;
+ }
}
/**
- * Used in matchAndRemove()
- * @private
- **/
-function pregRemoveAndRecord( $match ) {
- global $wgMagicFound;
- $wgMagicFound = true;
- return '';
-}
+ * Class for handling an array of magic words
+ */
+class MagicWordArray {
+ var $names = array();
+ var $hash;
+ var $baseRegex, $regex;
-?>
+ function __construct( $names = array() ) {
+ $this->names = $names;
+ }
+
+ /**
+ * Add a magic word by name
+ */
+ public function add( $name ) {
+ global $wgContLang;
+ $this->names[] = $name;
+ $this->hash = $this->baseRegex = $this->regex = null;
+ }
+
+ /**
+ * Add a number of magic words by name
+ */
+ public function addArray( $names ) {
+ $this->names = array_merge( $this->names, array_values( $names ) );
+ $this->hash = $this->baseRegex = $this->regex = null;
+ }
+
+ /**
+ * Get a 2-d hashtable for this array
+ */
+ function getHash() {
+ if ( is_null( $this->hash ) ) {
+ global $wgContLang;
+ $this->hash = array( 0 => array(), 1 => array() );
+ foreach ( $this->names as $name ) {
+ $magic = MagicWord::get( $name );
+ $case = intval( $magic->isCaseSensitive() );
+ foreach ( $magic->getSynonyms() as $syn ) {
+ if ( !$case ) {
+ $syn = $wgContLang->lc( $syn );
+ }
+ $this->hash[$case][$syn] = $name;
+ }
+ }
+ }
+ return $this->hash;
+ }
+
+ /**
+ * Get the base regex
+ */
+ function getBaseRegex() {
+ if ( is_null( $this->baseRegex ) ) {
+ $this->baseRegex = array( 0 => '', 1 => '' );
+ foreach ( $this->names as $name ) {
+ $magic = MagicWord::get( $name );
+ $case = intval( $magic->isCaseSensitive() );
+ foreach ( $magic->getSynonyms() as $i => $syn ) {
+ $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
+ if ( $this->baseRegex[$case] === '' ) {
+ $this->baseRegex[$case] = $group;
+ } else {
+ $this->baseRegex[$case] .= '|' . $group;
+ }
+ }
+ }
+ }
+ return $this->baseRegex;
+ }
+
+ /**
+ * Get an unanchored regex
+ */
+ function getRegex() {
+ if ( is_null( $this->regex ) ) {
+ $base = $this->getBaseRegex();
+ $this->regex = array( '', '' );
+ if ( $this->baseRegex[0] !== '' ) {
+ $this->regex[0] = "/{$base[0]}/iuS";
+ }
+ if ( $this->baseRegex[1] !== '' ) {
+ $this->regex[1] = "/{$base[1]}/S";
+ }
+ }
+ return $this->regex;
+ }
+
+ /**
+ * Get a regex for matching variables
+ */
+ function getVariableRegex() {
+ return str_replace( "\\$1", "(.*?)", $this->getRegex() );
+ }
+
+ /**
+ * Get an anchored regex for matching variables
+ */
+ function getVariableStartToEndRegex() {
+ $base = $this->getBaseRegex();
+ $newRegex = array( '', '' );
+ if ( $base[0] !== '' ) {
+ $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
+ }
+ if ( $base[1] !== '' ) {
+ $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
+ }
+ return $newRegex;
+ }
+
+ /**
+ * Parse a match array from preg_match
+ */
+ function parseMatch( $m ) {
+ reset( $m );
+ while ( list( $key, $value ) = each( $m ) ) {
+ if ( $key === 0 || $value === '' ) {
+ continue;
+ }
+ $parts = explode( '_', $key, 2 );
+ if ( count( $parts ) != 2 ) {
+ // This shouldn't happen
+ // continue;
+ throw new MWException( __METHOD__ . ': bad parameter name' );
+ }
+ list( /* $synIndex */, $magicName ) = $parts;
+ $paramValue = next( $m );
+ return array( $magicName, $paramValue );
+ }
+ // This shouldn't happen either
+ throw new MWException( __METHOD__.': parameter not found' );
+ return array( false, false );
+ }
+
+ /**
+ * Match some text, with parameter capture
+ * Returns an array with the magic word name in the first element and the
+ * parameter in the second element.
+ * Both elements are false if there was no match.
+ */
+ public function matchVariableStartToEnd( $text ) {
+ global $wgContLang;
+ $regexes = $this->getVariableStartToEndRegex();
+ foreach ( $regexes as $regex ) {
+ if ( $regex !== '' ) {
+ $m = false;
+ if ( preg_match( $regex, $text, $m ) ) {
+ return $this->parseMatch( $m );
+ }
+ }
+ }
+ return array( false, false );
+ }
+
+ /**
+ * Match some text, without parameter capture
+ * Returns the magic word name, or false if there was no capture
+ */
+ public function matchStartToEnd( $text ) {
+ $hash = $this->getHash();
+ if ( isset( $hash[1][$text] ) ) {
+ return $hash[1][$text];
+ }
+ global $wgContLang;
+ $lc = $wgContLang->lc( $text );
+ if ( isset( $hash[0][$lc] ) ) {
+ return $hash[0][$lc];
+ }
+ return false;
+ }
+}