X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/mediawiki.git/blobdiff_plain/a4b52d2fe555a507c376e78ee624898c55968364..d7967d5e4460e08b6b258307afbca0596b18a3dd:/includes/MagicWord.php?ds=sidebyside diff --git a/includes/MagicWord.php b/includes/MagicWord.php index 0fbe370b..f7a9400d 100644 --- a/includes/MagicWord.php +++ b/includes/MagicWord.php @@ -1,108 +1,112 @@ match( $text ) ) - * - * Possible future improvements: + * if (MagicWord::get( 'redirect' )->match( $text ) ) + * + * Possible future improvements: * * Simultaneous searching for a number of magic words - * * $wgMagicWords in shared memory + * * MagicWord::$mObjects in shared memory * - * Please avoid reading the data out of one of these objects and then writing + * Please avoid reading the data out of one of these objects and then writing * special case code. If possible, add another match()-like function here. * - * @package MediaWiki + * To add magic words in an extension, use the LanguageGetMagic hook. For + * magic words which are also Parser variables, add a MagicWordwgVariableIDs + * hook. Use string keys. + * */ class MagicWord { /**#@+ - * @access private + * @private */ var $mId, $mSynonyms, $mCaseSensitive, $mRegex; var $mRegexStart, $mBaseRegex, $mVariableRegex; - var $mModified; + var $mModified, $mFound; + + static public $mVariableIDsInitialised = false; + static public $mVariableIDs = array( + 'currentmonth', + 'currentmonthname', + 'currentmonthnamegen', + 'currentmonthabbrev', + 'currentday', + 'currentday2', + 'currentdayname', + 'currentyear', + 'currenttime', + 'currenthour', + 'localmonth', + 'localmonthname', + 'localmonthnamegen', + 'localmonthabbrev', + 'localday', + 'localday2', + 'localdayname', + 'localyear', + 'localtime', + 'localhour', + 'numberofarticles', + 'numberoffiles', + 'numberofedits', + 'sitename', + 'server', + 'servername', + 'scriptpath', + 'pagename', + 'pagenamee', + 'fullpagename', + 'fullpagenamee', + 'namespace', + 'namespacee', + 'currentweek', + 'currentdow', + 'localweek', + 'localdow', + 'revisionid', + 'revisionday', + 'revisionday2', + 'revisionmonth', + 'revisionyear', + 'revisiontimestamp', + 'subpagename', + 'subpagenamee', + 'displaytitle', + 'talkspace', + 'talkspacee', + 'subjectspace', + 'subjectspacee', + 'talkpagename', + 'talkpagenamee', + 'subjectpagename', + 'subjectpagenamee', + 'numberofusers', + 'newsectionlink', + 'numberofpages', + 'currentversion', + 'basepagename', + 'basepagenamee', + 'urlencode', + 'currenttimestamp', + 'localtimestamp', + 'directionmark', + 'language', + 'contentlanguage', + 'pagesinnamespace', + 'numberofadmins', + 'defaultsort', + ); + + static public $mObjects = array(); + /**#@-*/ - function MagicWord($id = 0, $syn = '', $cs = false) { + function __construct($id = 0, $syn = '', $cs = false) { $this->mId = $id; $this->mSynonyms = (array)$syn; $this->mCaseSensitive = $cs; @@ -117,27 +121,46 @@ class MagicWord { * Factory: creates an object representing an ID * @static */ - function &get( $id ) { - global $wgMagicWords; - - if ( !is_array( $wgMagicWords ) ) { - wfDebugDieBacktrace( "Incorrect initialisation order, \$wgMagicWords does not exist\n" ); - } - if (!array_key_exists( $id, $wgMagicWords ) ) { + static function &get( $id ) { + if (!array_key_exists( $id, self::$mObjects ) ) { $mw = new MagicWord(); $mw->load( $id ); - $wgMagicWords[$id] = $mw; + self::$mObjects[$id] = $mw; + } + return self::$mObjects[$id]; + } + + /** + * Get an array of parser variable IDs + */ + static function getVariableIDs() { + if ( !self::$mVariableIDsInitialised ) { + # Deprecated constant definition hook, available for extensions that need it + $magicWords = array(); + wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) ); + foreach ( $magicWords as $word ) { + define( $word, $word ); + } + + # Get variable IDs + wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) ); + self::$mVariableIDsInitialised = true; } - return $wgMagicWords[$id]; + return self::$mVariableIDs; } - + # Initialises this object with an ID function load( $id ) { - global $wgContLang; + global $wgContLang; $this->mId = $id; $wgContLang->getMagic( $this ); + if ( !$this->mSynonyms ) { + $this->mSynonyms = array( 'dkjsagfjsgashfajsh' ); + #throw new MWException( "Error: invalid magic word '$id'" ); + wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" ); + } } - + /** * Preliminary initialisation * @private @@ -147,16 +170,21 @@ class MagicWord { # This was used for matching "$1" variables, but different uses of the feature will have # different restrictions, which should be checked *after* the MagicWord has been matched, # not here. - IMSoP - $escSyn = array_map( 'preg_quote', $this->mSynonyms ); + + $escSyn = array(); + foreach ( $this->mSynonyms as $synonym ) + // In case a magic word contains /, like that's going to happen;) + $escSyn[] = preg_quote( $synonym, '/' ); $this->mBaseRegex = implode( '|', $escSyn ); - $case = $this->mCaseSensitive ? '' : 'i'; + + $case = $this->mCaseSensitive ? '' : 'iu'; $this->mRegex = "/{$this->mBaseRegex}/{$case}"; $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}"; $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex ); - $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)", + $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)", "/^(?:{$this->mBaseRegex})$/{$case}" ); } - + /** * Gets a regex representing matching the word */ @@ -167,6 +195,18 @@ class MagicWord { return $this->mRegex; } + /** + * Gets the regexp case modifier to use, i.e. i or nothing, to be used if + * one is using MagicWord::getBaseRegex(), otherwise it'll be included in + * the complete expression + */ + function getRegexCase() { + if ( $this->mRegex === '' ) + $this->initRegex(); + + return $this->mCaseSensitive ? '' : 'iu'; + } + /** * Gets a regex matching the word, if it is at the string start */ @@ -186,7 +226,7 @@ class MagicWord { } return $this->mBaseRegex; } - + /** * Returns true if the text contains the word * @return bool @@ -210,16 +250,20 @@ class MagicWord { * is one. */ function matchVariableStartToEnd( $text ) { + $matches = array(); $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches ); if ( $matchcount == 0 ) { return NULL; - } elseif ( count($matches) == 1 ) { - return $matches[0]; } else { - # multiple matched parts (variable match); some will be empty because of synonyms - # the variable will be the second non-empty one so remove any blank elements and re-sort the indices + # multiple matched parts (variable match); some will be empty because of + # synonyms. The variable will be the second non-empty one so remove any + # blank elements and re-sort the indices. + # See also bug 6526 + $matches = array_values(array_filter($matches)); - return $matches[1]; + + if ( count($matches) == 1 ) { return $matches[0]; } + else { return $matches[1]; } } } @@ -229,25 +273,31 @@ class MagicWord { * input string, removing all instances of the word */ function matchAndRemove( &$text ) { - global $wgMagicFound; - $wgMagicFound = false; - $text = preg_replace_callback( $this->getRegex(), 'pregRemoveAndRecord', $text ); - return $wgMagicFound; + $this->mFound = false; + $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text ); + return $this->mFound; } function matchStartAndRemove( &$text ) { - global $wgMagicFound; - $wgMagicFound = false; - $text = preg_replace_callback( $this->getRegexStart(), 'pregRemoveAndRecord', $text ); - return $wgMagicFound; - } + $this->mFound = false; + $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text ); + return $this->mFound; + } + /** + * Used in matchAndRemove() + * @private + **/ + function pregRemoveAndRecord( ) { + $this->mFound = true; + return ''; + } /** * Replaces the word with something else */ - function replace( $replacement, $subject ) { - $res = preg_replace( $this->getRegex(), $replacement, $subject ); + function replace( $replacement, $subject, $limit=-1 ) { + $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit ); $this->mModified = !($res === $subject); return $res; } @@ -258,7 +308,6 @@ class MagicWord { * Input word must contain $1 */ function substituteCallback( $text, $callback ) { - $regex = $this->getVariableRegex(); $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text ); $this->mModified = !($res === $text); return $res; @@ -270,7 +319,7 @@ class MagicWord { function getVariableRegex() { if ( $this->mVariableRegex == '' ) { $this->initRegex(); - } + } return $this->mVariableRegex; } @@ -280,7 +329,7 @@ class MagicWord { function getVariableStartToEndRegex() { if ( $this->mVariableStartToEndRegex == '' ) { $this->initRegex(); - } + } return $this->mVariableStartToEndRegex; } @@ -291,8 +340,12 @@ class MagicWord { return $this->mSynonyms[$i]; } + function getSynonyms() { + return $this->mSynonyms; + } + /** - * Returns true if the last call to replace() or substituteCallback() + * Returns true if the last call to replace() or substituteCallback() * returned a modified text, otherwise false. */ function getWasModified(){ @@ -304,7 +357,7 @@ class MagicWord { * This method uses the php feature to do several replacements at the same time, * thereby gaining some efficiency. The result is placed in the out variable * $result. The return value is true if something was replaced. - * @static + * @static **/ function replaceMultiple( $magicarr, $subject, &$result ){ $search = array(); @@ -324,20 +377,190 @@ class MagicWord { * lookup in a list of magic words */ function addToArray( &$array, $value ) { + global $wgContLang; foreach ( $this->mSynonyms as $syn ) { - $array[$syn] = $value; + $array[$wgContLang->lc($syn)] = $value; } } + + function isCaseSensitive() { + return $this->mCaseSensitive; + } + + function getId() { + return $this->mId; + } } /** - * Used in matchAndRemove() - * @private - **/ -function pregRemoveAndRecord( $match ) { - global $wgMagicFound; - $wgMagicFound = true; - return ''; -} + * Class for handling an array of magic words + */ +class MagicWordArray { + var $names = array(); + var $hash; + var $baseRegex, $regex; -?> + function __construct( $names = array() ) { + $this->names = $names; + } + + /** + * Add a magic word by name + */ + public function add( $name ) { + global $wgContLang; + $this->names[] = $name; + $this->hash = $this->baseRegex = $this->regex = null; + } + + /** + * Add a number of magic words by name + */ + public function addArray( $names ) { + $this->names = array_merge( $this->names, array_values( $names ) ); + $this->hash = $this->baseRegex = $this->regex = null; + } + + /** + * Get a 2-d hashtable for this array + */ + function getHash() { + if ( is_null( $this->hash ) ) { + global $wgContLang; + $this->hash = array( 0 => array(), 1 => array() ); + foreach ( $this->names as $name ) { + $magic = MagicWord::get( $name ); + $case = intval( $magic->isCaseSensitive() ); + foreach ( $magic->getSynonyms() as $syn ) { + if ( !$case ) { + $syn = $wgContLang->lc( $syn ); + } + $this->hash[$case][$syn] = $name; + } + } + } + return $this->hash; + } + + /** + * Get the base regex + */ + function getBaseRegex() { + if ( is_null( $this->baseRegex ) ) { + $this->baseRegex = array( 0 => '', 1 => '' ); + foreach ( $this->names as $name ) { + $magic = MagicWord::get( $name ); + $case = intval( $magic->isCaseSensitive() ); + foreach ( $magic->getSynonyms() as $i => $syn ) { + $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')'; + if ( $this->baseRegex[$case] === '' ) { + $this->baseRegex[$case] = $group; + } else { + $this->baseRegex[$case] .= '|' . $group; + } + } + } + } + return $this->baseRegex; + } + + /** + * Get an unanchored regex + */ + function getRegex() { + if ( is_null( $this->regex ) ) { + $base = $this->getBaseRegex(); + $this->regex = array( '', '' ); + if ( $this->baseRegex[0] !== '' ) { + $this->regex[0] = "/{$base[0]}/iuS"; + } + if ( $this->baseRegex[1] !== '' ) { + $this->regex[1] = "/{$base[1]}/S"; + } + } + return $this->regex; + } + + /** + * Get a regex for matching variables + */ + function getVariableRegex() { + return str_replace( "\\$1", "(.*?)", $this->getRegex() ); + } + + /** + * Get an anchored regex for matching variables + */ + function getVariableStartToEndRegex() { + $base = $this->getBaseRegex(); + $newRegex = array( '', '' ); + if ( $base[0] !== '' ) { + $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" ); + } + if ( $base[1] !== '' ) { + $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" ); + } + return $newRegex; + } + + /** + * Parse a match array from preg_match + */ + function parseMatch( $m ) { + reset( $m ); + while ( list( $key, $value ) = each( $m ) ) { + if ( $key === 0 || $value === '' ) { + continue; + } + $parts = explode( '_', $key, 2 ); + if ( count( $parts ) != 2 ) { + // This shouldn't happen + // continue; + throw new MWException( __METHOD__ . ': bad parameter name' ); + } + list( /* $synIndex */, $magicName ) = $parts; + $paramValue = next( $m ); + return array( $magicName, $paramValue ); + } + // This shouldn't happen either + throw new MWException( __METHOD__.': parameter not found' ); + return array( false, false ); + } + + /** + * Match some text, with parameter capture + * Returns an array with the magic word name in the first element and the + * parameter in the second element. + * Both elements are false if there was no match. + */ + public function matchVariableStartToEnd( $text ) { + global $wgContLang; + $regexes = $this->getVariableStartToEndRegex(); + foreach ( $regexes as $regex ) { + if ( $regex !== '' ) { + $m = false; + if ( preg_match( $regex, $text, $m ) ) { + return $this->parseMatch( $m ); + } + } + } + return array( false, false ); + } + + /** + * Match some text, without parameter capture + * Returns the magic word name, or false if there was no capture + */ + public function matchStartToEnd( $text ) { + $hash = $this->getHash(); + if ( isset( $hash[1][$text] ) ) { + return $hash[1][$text]; + } + global $wgContLang; + $lc = $wgContLang->lc( $text ); + if ( isset( $hash[0][$lc] ) ) { + return $hash[0][$lc]; + } + return false; + } +}