X-Git-Url: https://scripts.mit.edu/gitweb/autoinstallsdev/mediawiki.git/blobdiff_plain/19e297c21b10b1b8a3acad5e73fc71dcb35db44a..6932310fd58ebef145fa01eb76edf7150284d8ea:/includes/MagicWord.php diff --git a/includes/MagicWord.php b/includes/MagicWord.php index 31d83332..6e7799a3 100644 --- a/includes/MagicWord.php +++ b/includes/MagicWord.php @@ -1,17 +1,35 @@ match( $text ) ) + * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc. + * + * @par Usage: + * @code + * if (MagicWord::get( 'redirect' )->match( $text ) ) { + * // some code + * } + * @endcode * * Possible future improvements: * * Simultaneous searching for a number of magic words @@ -20,22 +38,63 @@ * Please avoid reading the data out of one of these objects and then writing * special case code. If possible, add another match()-like function here. * - * To add magic words in an extension, use the LanguageGetMagic hook. For - * magic words which are also Parser variables, add a MagicWordwgVariableIDs + * To add magic words in an extension, use $magicWords in a file listed in + * $wgExtensionMessagesFiles[]. + * + * @par Example: + * @code + * $magicWords = []; + * + * $magicWords['en'] = [ + * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ], + * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ], + * ]; + * @endcode + * + * For magic words which are also Parser variables, add a MagicWordwgVariableIDs * hook. Use string keys. * * @ingroup Parser */ class MagicWord { - /**#@+ - * @private - */ - var $mId, $mSynonyms, $mCaseSensitive, $mRegex; - var $mRegexStart, $mBaseRegex, $mVariableRegex; - var $mModified, $mFound; + /**#@-*/ + + /** @var int */ + public $mId; + + /** @var array */ + public $mSynonyms; + + /** @var bool */ + public $mCaseSensitive; + + /** @var string */ + private $mRegex = ''; + + /** @var string */ + private $mRegexStart = ''; + + /** @var string */ + private $mRegexStartToEnd = ''; + + /** @var string */ + private $mBaseRegex = ''; + + /** @var string */ + private $mVariableRegex = ''; + + /** @var string */ + private $mVariableStartToEndRegex = ''; + + /** @var bool */ + private $mModified = false; + + /** @var bool */ + private $mFound = false; - static public $mVariableIDsInitialised = false; - static public $mVariableIDs = array( + public static $mVariableIDsInitialised = false; + public static $mVariableIDs = [ + '!', 'currentmonth', 'currentmonth1', 'currentmonthname', @@ -62,6 +121,7 @@ class MagicWord { 'numberoffiles', 'numberofedits', 'articlepath', + 'pageid', 'sitename', 'server', 'servername', @@ -73,6 +133,7 @@ class MagicWord { 'fullpagenamee', 'namespace', 'namespacee', + 'namespacenumber', 'currentweek', 'currentdow', 'localweek', @@ -85,6 +146,7 @@ class MagicWord { 'revisionyear', 'revisiontimestamp', 'revisionuser', + 'revisionsize', 'subpagename', 'subpagenamee', 'talkspace', @@ -99,18 +161,21 @@ class MagicWord { 'numberofactiveusers', 'numberofpages', 'currentversion', + 'rootpagename', + 'rootpagenamee', 'basepagename', 'basepagenamee', 'currenttimestamp', 'localtimestamp', 'directionmark', 'contentlanguage', + 'pagelanguage', 'numberofadmins', - 'numberofviews', - ); + 'cascadingsources', + ]; /* Array of caching hints for ParserCache */ - static public $mCacheTTLs = array ( + public static $mCacheTTLs = [ 'currentmonth' => 86400, 'currentmonth1' => 86400, 'currentmonthname' => 86400, @@ -148,11 +213,10 @@ class MagicWord { 'localtimestamp' => 3600, 'pagesinnamespace' => 3600, 'numberofadmins' => 3600, - 'numberofviews' => 3600, 'numberingroup' => 3600, - ); + ]; - static public $mDoubleUnderscoreIDs = array( + public static $mDoubleUnderscoreIDs = [ 'notoc', 'nogallery', 'forcetoc', @@ -166,58 +230,49 @@ class MagicWord { 'staticredirect', 'notitleconvert', 'nocontentconvert', - ); + ]; - static public $mSubstIDs = array( + public static $mSubstIDs = [ 'subst', 'safesubst', - ); + ]; - static public $mObjects = array(); - static public $mDoubleUnderscoreArray = null; + public static $mObjects = []; + public static $mDoubleUnderscoreArray = null; /**#@-*/ - function __construct($id = 0, $syn = array(), $cs = false) { + public function __construct( $id = 0, $syn = [], $cs = false ) { $this->mId = $id; $this->mSynonyms = (array)$syn; $this->mCaseSensitive = $cs; - $this->mRegex = ''; - $this->mRegexStart = ''; - $this->mVariableRegex = ''; - $this->mVariableStartToEndRegex = ''; - $this->mModified = false; } /** * Factory: creates an object representing an ID - * @static + * + * @param int $id + * + * @return MagicWord */ - static function &get( $id ) { - wfProfileIn( __METHOD__ ); + public static function &get( $id ) { if ( !isset( self::$mObjects[$id] ) ) { $mw = new MagicWord(); $mw->load( $id ); self::$mObjects[$id] = $mw; } - wfProfileOut( __METHOD__ ); return self::$mObjects[$id]; } /** * Get an array of parser variable IDs + * + * @return array */ - static function getVariableIDs() { + public static function getVariableIDs() { if ( !self::$mVariableIDsInitialised ) { - # Deprecated constant definition hook, available for extensions that need it - $magicWords = array(); - wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) ); - foreach ( $magicWords as $word ) { - define( $word, $word ); - } - # Get variable IDs - wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) ); + Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] ); self::$mVariableIDsInitialised = true; } return self::$mVariableIDs; @@ -225,23 +280,34 @@ class MagicWord { /** * Get an array of parser substitution modifier IDs + * @return array */ - static function getSubstIDs() { - return self::$mSubstIDs; + public static function getSubstIDs() { + return self::$mSubstIDs; } - /* Allow external reads of TTL array */ - static function getCacheTTL($id) { - if (array_key_exists($id,self::$mCacheTTLs)) { + /** + * Allow external reads of TTL array + * + * @param int $id + * @return int + */ + public static function getCacheTTL( $id ) { + if ( array_key_exists( $id, self::$mCacheTTLs ) ) { return self::$mCacheTTLs[$id]; } else { return -1; } } - /** Get a MagicWordArray of double-underscore entities */ - static function getDoubleUnderscoreArray() { + /** + * Get a MagicWordArray of double-underscore entities + * + * @return MagicWordArray + */ + public static function getDoubleUnderscoreArray() { if ( is_null( self::$mDoubleUnderscoreArray ) ) { + Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] ); self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs ); } return self::$mDoubleUnderscoreArray; @@ -252,18 +318,22 @@ class MagicWord { * For use in parser tests */ public static function clearCache() { - self::$mObjects = array(); + self::$mObjects = []; } - # Initialises this object with an ID - function load( $id ) { + /** + * Initialises this object with an ID + * + * @param int $id + * @throws MWException + */ + public function load( $id ) { global $wgContLang; $this->mId = $id; $wgContLang->getMagic( $this ); if ( !$this->mSynonyms ) { - $this->mSynonyms = array( 'dkjsagfjsgashfajsh' ); - #throw new MWException( "Error: invalid magic word '$id'" ); - wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" ); + $this->mSynonyms = [ 'brionmademeputthishere' ]; + throw new MWException( "Error: invalid magic word '$id'" ); } } @@ -271,32 +341,39 @@ class MagicWord { * Preliminary initialisation * @private */ - function initRegex() { + public function initRegex() { // Sort the synonyms by length, descending, so that the longest synonym // matches in precedence to the shortest $synonyms = $this->mSynonyms; - usort( $synonyms, array( $this, 'compareStringLength' ) ); + usort( $synonyms, [ $this, 'compareStringLength' ] ); - $escSyn = array(); - foreach ( $synonyms as $synonym ) + $escSyn = []; + foreach ( $synonyms as $synonym ) { // In case a magic word contains /, like that's going to happen;) $escSyn[] = preg_quote( $synonym, '/' ); + } $this->mBaseRegex = implode( '|', $escSyn ); $case = $this->mCaseSensitive ? '' : 'iu'; $this->mRegex = "/{$this->mBaseRegex}/{$case}"; $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}"; + $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}"; $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex ); $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)", "/^(?:{$this->mBaseRegex})$/{$case}" ); } /** - * A comparison function that returns -1, 0 or 1 depending on whether the - * first string is longer, the same length or shorter than the second + * A comparison function that returns -1, 0 or 1 depending on whether the + * first string is longer, the same length or shorter than the second * string. + * + * @param string $s1 + * @param string $s2 + * + * @return int */ - function compareStringLength( $s1, $s2 ) { + public function compareStringLength( $s1, $s2 ) { $l1 = strlen( $s1 ); $l2 = strlen( $s2 ); if ( $l1 < $l2 ) { @@ -310,9 +387,11 @@ class MagicWord { /** * Gets a regex representing matching the word + * + * @return string */ - function getRegex() { - if ($this->mRegex == '' ) { + public function getRegex() { + if ( $this->mRegex == '' ) { $this->initRegex(); } return $this->mRegex; @@ -322,29 +401,49 @@ class MagicWord { * Gets the regexp case modifier to use, i.e. i or nothing, to be used if * one is using MagicWord::getBaseRegex(), otherwise it'll be included in * the complete expression + * + * @return string */ - function getRegexCase() { - if ( $this->mRegex === '' ) + public function getRegexCase() { + if ( $this->mRegex === '' ) { $this->initRegex(); + } return $this->mCaseSensitive ? '' : 'iu'; } /** * Gets a regex matching the word, if it is at the string start + * + * @return string */ - function getRegexStart() { - if ($this->mRegex == '' ) { + public function getRegexStart() { + if ( $this->mRegex == '' ) { $this->initRegex(); } return $this->mRegexStart; } + /** + * Gets a regex matching the word from start to end of a string + * + * @return string + * @since 1.23 + */ + public function getRegexStartToEnd() { + if ( $this->mRegexStartToEnd == '' ) { + $this->initRegex(); + } + return $this->mRegexStartToEnd; + } + /** * regex without the slashes and what not + * + * @return string */ - function getBaseRegex() { - if ($this->mRegex == '') { + public function getBaseRegex() { + if ( $this->mRegex == '' ) { $this->initRegex(); } return $this->mBaseRegex; @@ -352,28 +451,50 @@ class MagicWord { /** * Returns true if the text contains the word + * + * @param string $text + * * @return bool */ - function match( $text ) { + public function match( $text ) { return (bool)preg_match( $this->getRegex(), $text ); } /** * Returns true if the text starts with the word + * + * @param string $text + * * @return bool */ - function matchStart( $text ) { + public function matchStart( $text ) { return (bool)preg_match( $this->getRegexStart(), $text ); } + /** + * Returns true if the text matched the word + * + * @param string $text + * + * @return bool + * @since 1.23 + */ + public function matchStartToEnd( $text ) { + return (bool)preg_match( $this->getRegexStartToEnd(), $text ); + } + /** * Returns NULL if there's no match, the value of $1 otherwise * The return code is the matched string, if there's no variable * part in the regex and the matched variable part ($1) if there * is one. + * + * @param string $text + * + * @return string */ - function matchVariableStartToEnd( $text ) { - $matches = array(); + public function matchVariableStartToEnd( $text ) { + $matches = []; $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches ); if ( $matchcount == 0 ) { return null; @@ -381,47 +502,79 @@ class MagicWord { # multiple matched parts (variable match); some will be empty because of # synonyms. The variable will be the second non-empty one so remove any # blank elements and re-sort the indices. - # See also bug 6526 + # See also T8526 - $matches = array_values(array_filter($matches)); + $matches = array_values( array_filter( $matches ) ); - if ( count($matches) == 1 ) { return $matches[0]; } - else { return $matches[1]; } + if ( count( $matches ) == 1 ) { + return $matches[0]; + } else { + return $matches[1]; + } } } - /** * Returns true if the text matches the word, and alters the * input string, removing all instances of the word + * + * @param string &$text + * + * @return bool */ - function matchAndRemove( &$text ) { + public function matchAndRemove( &$text ) { $this->mFound = false; - $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text ); + $text = preg_replace_callback( + $this->getRegex(), + [ $this, 'pregRemoveAndRecord' ], + $text + ); + return $this->mFound; } - function matchStartAndRemove( &$text ) { + /** + * @param string &$text + * @return bool + */ + public function matchStartAndRemove( &$text ) { $this->mFound = false; - $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text ); + $text = preg_replace_callback( + $this->getRegexStart(), + [ $this, 'pregRemoveAndRecord' ], + $text + ); + return $this->mFound; } /** * Used in matchAndRemove() - * @private - **/ - function pregRemoveAndRecord( ) { + * + * @return string + */ + public function pregRemoveAndRecord() { $this->mFound = true; return ''; } /** * Replaces the word with something else - */ - function replace( $replacement, $subject, $limit=-1 ) { - $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit ); - $this->mModified = !($res === $subject); + * + * @param string $replacement + * @param string $subject + * @param int $limit + * + * @return string + */ + public function replace( $replacement, $subject, $limit = -1 ) { + $res = preg_replace( + $this->getRegex(), + StringUtils::escapeRegexReplacement( $replacement ), + $subject, + $limit + ); + $this->mModified = $res !== $subject; return $res; } @@ -429,17 +582,24 @@ class MagicWord { * Variable handling: {{SUBST:xxx}} style words * Calls back a function to determine what to replace xxx with * Input word must contain $1 + * + * @param string $text + * @param callable $callback + * + * @return string */ - function substituteCallback( $text, $callback ) { + public function substituteCallback( $text, $callback ) { $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text ); - $this->mModified = !($res === $text); + $this->mModified = $res !== $text; return $res; } /** * Matches the word, where $1 is a wildcard + * + * @return string */ - function getVariableRegex() { + public function getVariableRegex() { if ( $this->mVariableRegex == '' ) { $this->initRegex(); } @@ -448,8 +608,10 @@ class MagicWord { /** * Matches the entire string, where $1 is a wildcard + * + * @return string */ - function getVariableStartToEndRegex() { + public function getVariableStartToEndRegex() { if ( $this->mVariableStartToEndRegex == '' ) { $this->initRegex(); } @@ -458,294 +620,57 @@ class MagicWord { /** * Accesses the synonym list directly + * + * @param int $i + * + * @return string */ - function getSynonym( $i ) { + public function getSynonym( $i ) { return $this->mSynonyms[$i]; } - function getSynonyms() { + /** + * @return array + */ + public function getSynonyms() { return $this->mSynonyms; } /** * Returns true if the last call to replace() or substituteCallback() * returned a modified text, otherwise false. + * + * @return bool */ - function getWasModified(){ + public function getWasModified() { return $this->mModified; } - /** - * $magicarr is an associative array of (magic word ID => replacement) - * This method uses the php feature to do several replacements at the same time, - * thereby gaining some efficiency. The result is placed in the out variable - * $result. The return value is true if something was replaced. - * @static - **/ - function replaceMultiple( $magicarr, $subject, &$result ){ - $search = array(); - $replace = array(); - foreach( $magicarr as $id => $replacement ){ - $mw = MagicWord::get( $id ); - $search[] = $mw->getRegex(); - $replace[] = $replacement; - } - - $result = preg_replace( $search, $replace, $subject ); - return !($result === $subject); - } - /** * Adds all the synonyms of this MagicWord to an array, to allow quick * lookup in a list of magic words + * + * @param array &$array + * @param string $value */ - function addToArray( &$array, $value ) { + public function addToArray( &$array, $value ) { global $wgContLang; foreach ( $this->mSynonyms as $syn ) { - $array[$wgContLang->lc($syn)] = $value; + $array[$wgContLang->lc( $syn )] = $value; } } - function isCaseSensitive() { - return $this->mCaseSensitive; - } - - function getId() { - return $this->mId; - } -} - -/** - * Class for handling an array of magic words - * @ingroup Parser - */ -class MagicWordArray { - var $names = array(); - var $hash; - var $baseRegex, $regex; - var $matches; - - function __construct( $names = array() ) { - $this->names = $names; - } - - /** - * Add a magic word by name - */ - public function add( $name ) { - $this->names[] = $name; - $this->hash = $this->baseRegex = $this->regex = null; - } - - /** - * Add a number of magic words by name - */ - public function addArray( $names ) { - $this->names = array_merge( $this->names, array_values( $names ) ); - $this->hash = $this->baseRegex = $this->regex = null; - } - - /** - * Get a 2-d hashtable for this array - */ - function getHash() { - if ( is_null( $this->hash ) ) { - global $wgContLang; - $this->hash = array( 0 => array(), 1 => array() ); - foreach ( $this->names as $name ) { - $magic = MagicWord::get( $name ); - $case = intval( $magic->isCaseSensitive() ); - foreach ( $magic->getSynonyms() as $syn ) { - if ( !$case ) { - $syn = $wgContLang->lc( $syn ); - } - $this->hash[$case][$syn] = $name; - } - } - } - return $this->hash; - } - - /** - * Get the base regex - */ - function getBaseRegex() { - if ( is_null( $this->baseRegex ) ) { - $this->baseRegex = array( 0 => '', 1 => '' ); - foreach ( $this->names as $name ) { - $magic = MagicWord::get( $name ); - $case = intval( $magic->isCaseSensitive() ); - foreach ( $magic->getSynonyms() as $i => $syn ) { - $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')'; - if ( $this->baseRegex[$case] === '' ) { - $this->baseRegex[$case] = $group; - } else { - $this->baseRegex[$case] .= '|' . $group; - } - } - } - } - return $this->baseRegex; - } - - /** - * Get an unanchored regex that does not match parameters - */ - function getRegex() { - if ( is_null( $this->regex ) ) { - $base = $this->getBaseRegex(); - $this->regex = array( '', '' ); - if ( $this->baseRegex[0] !== '' ) { - $this->regex[0] = "/{$base[0]}/iuS"; - } - if ( $this->baseRegex[1] !== '' ) { - $this->regex[1] = "/{$base[1]}/S"; - } - } - return $this->regex; - } - - /** - * Get a regex for matching variables with parameters - */ - function getVariableRegex() { - return str_replace( "\\$1", "(.*?)", $this->getRegex() ); - } - - /** - * Get a regex anchored to the start of the string that does not match parameters - */ - function getRegexStart() { - $base = $this->getBaseRegex(); - $newRegex = array( '', '' ); - if ( $base[0] !== '' ) { - $newRegex[0] = "/^(?:{$base[0]})/iuS"; - } - if ( $base[1] !== '' ) { - $newRegex[1] = "/^(?:{$base[1]})/S"; - } - return $newRegex; - } - /** - * Get an anchored regex for matching variables with parameters - */ - function getVariableStartToEndRegex() { - $base = $this->getBaseRegex(); - $newRegex = array( '', '' ); - if ( $base[0] !== '' ) { - $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" ); - } - if ( $base[1] !== '' ) { - $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" ); - } - return $newRegex; - } - - /** - * Parse a match array from preg_match - * Returns array(magic word ID, parameter value) - * If there is no parameter value, that element will be false. - */ - function parseMatch( $m ) { - reset( $m ); - while ( list( $key, $value ) = each( $m ) ) { - if ( $key === 0 || $value === '' ) { - continue; - } - $parts = explode( '_', $key, 2 ); - if ( count( $parts ) != 2 ) { - // This shouldn't happen - // continue; - throw new MWException( __METHOD__ . ': bad parameter name' ); - } - list( /* $synIndex */, $magicName ) = $parts; - $paramValue = next( $m ); - return array( $magicName, $paramValue ); - } - // This shouldn't happen either - throw new MWException( __METHOD__.': parameter not found' ); - } - - /** - * Match some text, with parameter capture - * Returns an array with the magic word name in the first element and the - * parameter in the second element. - * Both elements are false if there was no match. - */ - public function matchVariableStartToEnd( $text ) { - $regexes = $this->getVariableStartToEndRegex(); - foreach ( $regexes as $regex ) { - if ( $regex !== '' ) { - $m = false; - if ( preg_match( $regex, $text, $m ) ) { - return $this->parseMatch( $m ); - } - } - } - return array( false, false ); - } - - /** - * Match some text, without parameter capture - * Returns the magic word name, or false if there was no capture - */ - public function matchStartToEnd( $text ) { - $hash = $this->getHash(); - if ( isset( $hash[1][$text] ) ) { - return $hash[1][$text]; - } - global $wgContLang; - $lc = $wgContLang->lc( $text ); - if ( isset( $hash[0][$lc] ) ) { - return $hash[0][$lc]; - } - return false; - } - - /** - * Returns an associative array, ID => param value, for all items that match - * Removes the matched items from the input string (passed by reference) + * @return bool */ - public function matchAndRemove( &$text ) { - $found = array(); - $regexes = $this->getRegex(); - foreach ( $regexes as $regex ) { - if ( $regex === '' ) { - continue; - } - preg_match_all( $regex, $text, $matches, PREG_SET_ORDER ); - foreach ( $matches as $m ) { - list( $name, $param ) = $this->parseMatch( $m ); - $found[$name] = $param; - } - $text = preg_replace( $regex, '', $text ); - } - return $found; + public function isCaseSensitive() { + return $this->mCaseSensitive; } /** - * Return the ID of the magic word at the start of $text, and remove - * the prefix from $text. - * Return false if no match found and $text is not modified. - * Does not match parameters. + * @return int */ - public function matchStartAndRemove( &$text ) { - $regexes = $this->getRegexStart(); - foreach ( $regexes as $regex ) { - if ( $regex === '' ) { - continue; - } - if ( preg_match( $regex, $text, $m ) ) { - list( $id, ) = $this->parseMatch( $m ); - if ( strlen( $m[0] ) >= strlen( $text ) ) { - $text = ''; - } else { - $text = substr( $text, strlen( $m[0] ) ); - } - return $id; - } - } - return false; + public function getId() { + return $this->mId; } }