X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/mediawiki.git/blobdiff_plain/19e297c21b10b1b8a3acad5e73fc71dcb35db44a..6932310fd58ebef145fa01eb76edf7150284d8ea:/includes/WebRequest.php diff --git a/includes/WebRequest.php b/includes/WebRequest.php index 940b693f..3d5e372c 100644 --- a/includes/WebRequest.php +++ b/includes/WebRequest.php @@ -1,9 +1,9 @@ - * http://www.mediawiki.org/ + * https://www.mediawiki.org/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,20 +23,32 @@ * @file */ +use MediaWiki\MediaWikiServices; +use MediaWiki\Session\Session; +use MediaWiki\Session\SessionId; +use MediaWiki\Session\SessionManager; + /** * The WebRequest class encapsulates getting at data passed in the - * URL or via a POSTed form, handling remove of "magic quotes" slashes, - * stripping illegal input characters and normalizing Unicode sequences. - * - * Usually this is used via a global singleton, $wgRequest. You should - * not create a second WebRequest object; make a FauxRequest object if - * you want to pass arbitrary data to some function in place of the web - * input. + * URL or via a POSTed form stripping illegal input characters and + * normalizing Unicode sequences. * * @ingroup HTTP */ class WebRequest { - protected $data, $headers = array(); + protected $data, $headers = []; + + /** + * Flag to make WebRequest::getHeader return an array of values. + * @since 1.26 + */ + const GETHEADER_LIST = 1; + + /** + * The unique request ID. + * @var string + */ + private static $reqId; /** * Lazy-init response object @@ -44,11 +56,40 @@ class WebRequest { */ private $response; + /** + * Cached client IP address + * @var string + */ + private $ip; + + /** + * The timestamp of the start of the request, with microsecond precision. + * @var float + */ + protected $requestTime; + + /** + * Cached URL protocol + * @var string + */ + protected $protocol; + + /** + * @var SessionId|null Session ID to use for this + * request. We can't save the session directly due to reference cycles not + * working too well (slow GC in Zend and never collected in HHVM). + */ + protected $sessionId = null; + + /** @var bool Whether this HTTP request is "safe" (even if it is an HTTP post) */ + protected $markedAsSafe = false; + + /** + * @codeCoverageIgnore + */ public function __construct() { - /// @todo Fixme: this preemptive de-quoting can interfere with other web libraries - /// and increases our memory footprint. It would be cleaner to do on - /// demand; but currently we have no wrapper for $_SERVER etc. - $this->checkMagicQuotes(); + $this->requestTime = isset( $_SERVER['REQUEST_TIME_FLOAT'] ) + ? $_SERVER['REQUEST_TIME_FLOAT'] : microtime( true ); // POST overrides GET data // We don't use $_REQUEST here to avoid interference from cookies... @@ -56,162 +97,274 @@ class WebRequest { } /** - * Check for title, action, and/or variant data in the URL - * and interpolate it into the GET variables. - * This should only be run after $wgContLang is available, - * as we may need the list of language variants to determine - * available variant URLs. + * Extract relevant query arguments from the http request uri's path + * to be merged with the normal php provided query arguments. + * Tries to use the REQUEST_URI data if available and parses it + * according to the wiki's configuration looking for any known pattern. + * + * If the REQUEST_URI is not provided we'll fall back on the PATH_INFO + * provided by the server if any and use that to set a 'title' parameter. + * + * @param string $want If this is not 'all', then the function + * will return an empty array if it determines that the URL is + * inside a rewrite path. + * + * @return array Any query arguments found in path matches. */ - public function interpolateTitle() { + public static function getPathInfo( $want = 'all' ) { global $wgUsePathInfo; + // PATH_INFO is mangled due to https://bugs.php.net/bug.php?id=31892 + // And also by Apache 2.x, double slashes are converted to single slashes. + // So we will use REQUEST_URI if possible. + $matches = []; + if ( !empty( $_SERVER['REQUEST_URI'] ) ) { + // Slurp out the path portion to examine... + $url = $_SERVER['REQUEST_URI']; + if ( !preg_match( '!^https?://!', $url ) ) { + $url = 'http://unused' . $url; + } + MediaWiki\suppressWarnings(); + $a = parse_url( $url ); + MediaWiki\restoreWarnings(); + if ( $a ) { + $path = isset( $a['path'] ) ? $a['path'] : ''; + + global $wgScript; + if ( $path == $wgScript && $want !== 'all' ) { + // Script inside a rewrite path? + // Abort to keep from breaking... + return $matches; + } - // bug 16019: title interpolation on API queries is useless and possible harmful - if ( defined( 'MW_API' ) ) { - return; - } + $router = new PathRouter; + + // Raw PATH_INFO style + $router->add( "$wgScript/$1" ); - if ( $wgUsePathInfo ) { - // PATH_INFO is mangled due to http://bugs.php.net/bug.php?id=31892 - // And also by Apache 2.x, double slashes are converted to single slashes. - // So we will use REQUEST_URI if possible. - $matches = array(); - - if ( !empty( $_SERVER['REQUEST_URI'] ) ) { - // Slurp out the path portion to examine... - $url = $_SERVER['REQUEST_URI']; - if ( !preg_match( '!^https?://!', $url ) ) { - $url = 'http://unused' . $url; + if ( isset( $_SERVER['SCRIPT_NAME'] ) + && preg_match( '/\.php5?/', $_SERVER['SCRIPT_NAME'] ) + ) { + # Check for SCRIPT_NAME, we handle index.php explicitly + # But we do have some other .php files such as img_auth.php + # Don't let root article paths clober the parsing for them + $router->add( $_SERVER['SCRIPT_NAME'] . "/$1" ); } - $a = parse_url( $url ); - if( $a ) { - $path = isset( $a['path'] ) ? $a['path'] : ''; - - global $wgScript; - if( $path == $wgScript ) { - // Script inside a rewrite path? - // Abort to keep from breaking... - return; - } - // Raw PATH_INFO style - $matches = $this->extractTitle( $path, "$wgScript/$1" ); - global $wgArticlePath; - if( !$matches && $wgArticlePath ) { - $matches = $this->extractTitle( $path, $wgArticlePath ); - } + global $wgArticlePath; + if ( $wgArticlePath ) { + $router->add( $wgArticlePath ); + } - global $wgActionPaths; - if( !$matches && $wgActionPaths ) { - $matches = $this->extractTitle( $path, $wgActionPaths, 'action' ); - } + global $wgActionPaths; + if ( $wgActionPaths ) { + $router->add( $wgActionPaths, [ 'action' => '$key' ] ); + } - global $wgVariantArticlePath, $wgContLang; - if( !$matches && $wgVariantArticlePath ) { - $variantPaths = array(); - foreach( $wgContLang->getVariants() as $variant ) { - $variantPaths[$variant] = - str_replace( '$2', $variant, $wgVariantArticlePath ); - } - $matches = $this->extractTitle( $path, $variantPaths, 'variant' ); - } + global $wgVariantArticlePath, $wgContLang; + if ( $wgVariantArticlePath ) { + $router->add( $wgVariantArticlePath, + [ 'variant' => '$2' ], + [ '$2' => $wgContLang->getVariants() ] + ); } - } elseif ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) { + + Hooks::run( 'WebRequestPathInfoRouter', [ $router ] ); + + $matches = $router->parse( $path ); + } + } elseif ( $wgUsePathInfo ) { + if ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) { // Mangled PATH_INFO - // http://bugs.php.net/bug.php?id=31892 + // https://bugs.php.net/bug.php?id=31892 // Also reported when ini_get('cgi.fix_pathinfo')==false $matches['title'] = substr( $_SERVER['ORIG_PATH_INFO'], 1 ); - } elseif ( isset( $_SERVER['PATH_INFO'] ) && ($_SERVER['PATH_INFO'] != '') ) { + } elseif ( isset( $_SERVER['PATH_INFO'] ) && $_SERVER['PATH_INFO'] != '' ) { // Regular old PATH_INFO yay $matches['title'] = substr( $_SERVER['PATH_INFO'], 1 ); } - foreach( $matches as $key => $val) { - $this->data[$key] = $_GET[$key] = $_REQUEST[$key] = $val; - } } + + return $matches; } /** - * Internal URL rewriting function; tries to extract page title and, - * optionally, one other fixed parameter value from a URL path. + * Work out an appropriate URL prefix containing scheme and host, based on + * information detected from $_SERVER * - * @param $path string: the URL path given from the client - * @param $bases array: one or more URLs, optionally with $1 at the end - * @param $key string: if provided, the matching key in $bases will be - * passed on as the value of this URL parameter - * @return array of URL variables to interpolate; empty if no match + * @return string */ - private function extractTitle( $path, $bases, $key=false ) { - foreach( (array)$bases as $keyValue => $base ) { - // Find the part after $wgArticlePath - $base = str_replace( '$1', '', $base ); - $baseLen = strlen( $base ); - if( substr( $path, 0, $baseLen ) == $base ) { - $raw = substr( $path, $baseLen ); - if( $raw !== '' ) { - $matches = array( 'title' => rawurldecode( $raw ) ); - if( $key ) { - $matches[$key] = $keyValue; - } - return $matches; - } + public static function detectServer() { + global $wgAssumeProxiesUseDefaultProtocolPorts; + + $proto = self::detectProtocol(); + $stdPort = $proto === 'https' ? 443 : 80; + + $varNames = [ 'HTTP_HOST', 'SERVER_NAME', 'HOSTNAME', 'SERVER_ADDR' ]; + $host = 'localhost'; + $port = $stdPort; + foreach ( $varNames as $varName ) { + if ( !isset( $_SERVER[$varName] ) ) { + continue; + } + + $parts = IP::splitHostAndPort( $_SERVER[$varName] ); + if ( !$parts ) { + // Invalid, do not use + continue; + } + + $host = $parts[0]; + if ( $wgAssumeProxiesUseDefaultProtocolPorts && isset( $_SERVER['HTTP_X_FORWARDED_PROTO'] ) ) { + // T72021: Assume that upstream proxy is running on the default + // port based on the protocol. We have no reliable way to determine + // the actual port in use upstream. + $port = $stdPort; + } elseif ( $parts[1] === false ) { + if ( isset( $_SERVER['SERVER_PORT'] ) ) { + $port = $_SERVER['SERVER_PORT']; + } // else leave it as $stdPort + } else { + $port = $parts[1]; } + break; } - return array(); + + return $proto . '://' . IP::combineHostAndPort( $host, $port, $stdPort ); } /** - * Recursively strips slashes from the given array; - * used for undoing the evil that is magic_quotes_gpc. + * Detect the protocol from $_SERVER. + * This is for use prior to Setup.php, when no WebRequest object is available. + * At other times, use the non-static function getProtocol(). * - * @param $arr array: will be modified - * @return array the original array + * @return string */ - private function &fix_magic_quotes( &$arr ) { - foreach( $arr as $key => $val ) { - if( is_array( $val ) ) { - $this->fix_magic_quotes( $arr[$key] ); - } else { - $arr[$key] = stripslashes( $val ); - } + public static function detectProtocol() { + if ( ( !empty( $_SERVER['HTTPS'] ) && $_SERVER['HTTPS'] !== 'off' ) || + ( isset( $_SERVER['HTTP_X_FORWARDED_PROTO'] ) && + $_SERVER['HTTP_X_FORWARDED_PROTO'] === 'https' ) ) { + return 'https'; + } else { + return 'http'; } - return $arr; } /** - * If magic_quotes_gpc option is on, run the global arrays - * through fix_magic_quotes to strip out the stupid slashes. - * WARNING: This should only be done once! Running a second - * time could damage the values. - */ - private function checkMagicQuotes() { - $mustFixQuotes = function_exists( 'get_magic_quotes_gpc' ) - && get_magic_quotes_gpc(); - if( $mustFixQuotes ) { - $this->fix_magic_quotes( $_COOKIE ); - $this->fix_magic_quotes( $_ENV ); - $this->fix_magic_quotes( $_GET ); - $this->fix_magic_quotes( $_POST ); - $this->fix_magic_quotes( $_REQUEST ); - $this->fix_magic_quotes( $_SERVER ); + * Get the number of seconds to have elapsed since request start, + * in fractional seconds, with microsecond resolution. + * + * @return float + * @since 1.25 + */ + public function getElapsedTime() { + return microtime( true ) - $this->requestTime; + } + + /** + * Get the unique request ID. + * This is either the value of the UNIQUE_ID envvar (if present) or a + * randomly-generated 24-character string. + * + * @return string + * @since 1.27 + */ + public static function getRequestId() { + if ( !self::$reqId ) { + self::$reqId = isset( $_SERVER['UNIQUE_ID'] ) + ? $_SERVER['UNIQUE_ID'] : wfRandomString( 24 ); } + + return self::$reqId; + } + + /** + * Override the unique request ID. This is for sub-requests, such as jobs, + * that wish to use the same id but are not part of the same execution context. + * + * @param string $id + * @since 1.27 + */ + public static function overrideRequestId( $id ) { + self::$reqId = $id; + } + + /** + * Get the current URL protocol (http or https) + * @return string + */ + public function getProtocol() { + if ( $this->protocol === null ) { + $this->protocol = self::detectProtocol(); + } + return $this->protocol; + } + + /** + * Check for title, action, and/or variant data in the URL + * and interpolate it into the GET variables. + * This should only be run after $wgContLang is available, + * as we may need the list of language variants to determine + * available variant URLs. + */ + public function interpolateTitle() { + // T18019: title interpolation on API queries is useless and sometimes harmful + if ( defined( 'MW_API' ) ) { + return; + } + + $matches = self::getPathInfo( 'title' ); + foreach ( $matches as $key => $val ) { + $this->data[$key] = $_GET[$key] = $_REQUEST[$key] = $val; + } + } + + /** + * URL rewriting function; tries to extract page title and, + * optionally, one other fixed parameter value from a URL path. + * + * @param string $path The URL path given from the client + * @param array $bases One or more URLs, optionally with $1 at the end + * @param string|bool $key If provided, the matching key in $bases will be + * passed on as the value of this URL parameter + * @return array Array of URL variables to interpolate; empty if no match + */ + static function extractTitle( $path, $bases, $key = false ) { + foreach ( (array)$bases as $keyValue => $base ) { + // Find the part after $wgArticlePath + $base = str_replace( '$1', '', $base ); + $baseLen = strlen( $base ); + if ( substr( $path, 0, $baseLen ) == $base ) { + $raw = substr( $path, $baseLen ); + if ( $raw !== '' ) { + $matches = [ 'title' => rawurldecode( $raw ) ]; + if ( $key ) { + $matches[$key] = $keyValue; + } + return $matches; + } + } + } + return []; } /** * Recursively normalizes UTF-8 strings in the given array. * - * @param $data string or array - * @return cleaned-up version of the given + * @param string|array $data + * @return array|string Cleaned-up version of the given * @private */ - function normalizeUnicode( $data ) { - if( is_array( $data ) ) { - foreach( $data as $key => $val ) { + public function normalizeUnicode( $data ) { + if ( is_array( $data ) ) { + foreach ( $data as $key => $val ) { $data[$key] = $this->normalizeUnicode( $val ); } } else { global $wgContLang; - $data = $wgContLang->normalize( $data ); + $data = isset( $wgContLang ) ? + $wgContLang->normalize( $data ) : + UtfNormal\Validator::cleanUp( $data ); } return $data; } @@ -219,49 +372,74 @@ class WebRequest { /** * Fetch a value from the given array or return $default if it's not set. * - * @param $arr Array - * @param $name String - * @param $default Mixed + * @param array $arr + * @param string $name + * @param mixed $default * @return mixed */ private function getGPCVal( $arr, $name, $default ) { # PHP is so nice to not touch input data, except sometimes: - # http://us2.php.net/variables.external#language.variables.external.dot-in-names + # https://secure.php.net/variables.external#language.variables.external.dot-in-names # Work around PHP *feature* to avoid *bugs* elsewhere. $name = strtr( $name, '.', '_' ); - if( isset( $arr[$name] ) ) { + if ( isset( $arr[$name] ) ) { global $wgContLang; $data = $arr[$name]; - if( isset( $_GET[$name] ) && !is_array( $data ) ) { + if ( isset( $_GET[$name] ) && !is_array( $data ) ) { # Check for alternate/legacy character encoding. - if( isset( $wgContLang ) ) { + if ( isset( $wgContLang ) ) { $data = $wgContLang->checkTitleEncoding( $data ); } } $data = $this->normalizeUnicode( $data ); return $data; } else { - taint( $default ); return $default; } } + /** + * Fetch a scalar from the input without normalization, or return $default + * if it's not set. + * + * Unlike self::getVal(), this does not perform any normalization on the + * input value. + * + * @since 1.28 + * @param string $name + * @param string|null $default Optional default + * @return string|null + */ + public function getRawVal( $name, $default = null ) { + $name = strtr( $name, '.', '_' ); // See comment in self::getGPCVal() + if ( isset( $this->data[$name] ) && !is_array( $this->data[$name] ) ) { + $val = $this->data[$name]; + } else { + $val = $default; + } + if ( is_null( $val ) ) { + return $val; + } else { + return (string)$val; + } + } + /** * Fetch a scalar from the input or return $default if it's not set. * Returns a string. Arrays are discarded. Useful for * non-freeform text inputs (e.g. predefined internal text keys * selected by a drop-down menu). For freeform input, see getText(). * - * @param $name String - * @param $default String: optional default (or NULL) - * @return String + * @param string $name + * @param string $default Optional default (or null) + * @return string|null */ public function getVal( $name, $default = null ) { $val = $this->getGPCVal( $this->data, $name, $default ); - if( is_array( $val ) ) { + if ( is_array( $val ) ) { $val = $default; } - if( is_null( $val ) ) { + if ( is_null( $val ) ) { return $val; } else { return (string)$val; @@ -269,11 +447,11 @@ class WebRequest { } /** - * Set an aribtrary value into our get/post data. + * Set an arbitrary value into our get/post data. * - * @param $key String: key name to use - * @param $value Mixed: value to set - * @return Mixed: old value if one was present, null otherwise + * @param string $key Key name to use + * @param mixed $value Value to set + * @return mixed Old value if one was present, null otherwise */ public function setVal( $key, $value ) { $ret = isset( $this->data[$key] ) ? $this->data[$key] : null; @@ -281,18 +459,34 @@ class WebRequest { return $ret; } + /** + * Unset an arbitrary value from our get/post data. + * + * @param string $key Key name to use + * @return mixed Old value if one was present, null otherwise + */ + public function unsetVal( $key ) { + if ( !isset( $this->data[$key] ) ) { + $ret = null; + } else { + $ret = $this->data[$key]; + unset( $this->data[$key] ); + } + return $ret; + } + /** * Fetch an array from the input or return $default if it's not set. * If source was scalar, will return an array with a single element. - * If no source and no default, returns NULL. + * If no source and no default, returns null. * - * @param $name String - * @param $default Array: optional default (or NULL) - * @return Array + * @param string $name + * @param array $default Optional default (or null) + * @return array|null */ public function getArray( $name, $default = null ) { $val = $this->getGPCVal( $this->data, $name, $default ); - if( is_null( $val ) ) { + if ( is_null( $val ) ) { return null; } else { return (array)$val; @@ -302,16 +496,16 @@ class WebRequest { /** * Fetch an array of integers, or return $default if it's not set. * If source was scalar, will return an array with a single element. - * If no source and no default, returns NULL. + * If no source and no default, returns null. * If an array is returned, contents are guaranteed to be integers. * - * @param $name String - * @param $default Array: option default (or NULL) - * @return Array of ints + * @param string $name + * @param array $default Option default (or null) + * @return array Array of ints */ public function getIntArray( $name, $default = null ) { $val = $this->getArray( $name, $default ); - if( is_array( $val ) ) { + if ( is_array( $val ) ) { $val = array_map( 'intval', $val ); } return $val; @@ -322,12 +516,12 @@ class WebRequest { * Guaranteed to return an integer; non-numeric input will typically * return 0. * - * @param $name String - * @param $default Integer - * @return Integer + * @param string $name + * @param int $default + * @return int */ public function getInt( $name, $default = 0 ) { - return intval( $this->getVal( $name, $default ) ); + return intval( $this->getRawVal( $name, $default ) ); } /** @@ -335,40 +529,55 @@ class WebRequest { * Guaranteed to return an integer or null; non-numeric input will * typically return null. * - * @param $name String - * @return Integer + * @param string $name + * @return int|null */ public function getIntOrNull( $name ) { - $val = $this->getVal( $name ); + $val = $this->getRawVal( $name ); return is_numeric( $val ) ? intval( $val ) : null; } + /** + * Fetch a floating point value from the input or return $default if not set. + * Guaranteed to return a float; non-numeric input will typically + * return 0. + * + * @since 1.23 + * @param string $name + * @param float $default + * @return float + */ + public function getFloat( $name, $default = 0.0 ) { + return floatval( $this->getRawVal( $name, $default ) ); + } + /** * Fetch a boolean value from the input or return $default if not set. * Guaranteed to return true or false, with normal PHP semantics for * boolean interpretation of strings. * - * @param $name String - * @param $default Boolean - * @return Boolean + * @param string $name + * @param bool $default + * @return bool */ public function getBool( $name, $default = false ) { - return (bool)$this->getVal( $name, $default ); + return (bool)$this->getRawVal( $name, $default ); } - + /** * Fetch a boolean value from the input or return $default if not set. * Unlike getBool, the string "false" will result in boolean false, which is * useful when interpreting information sent from JavaScript. * - * @param $name String - * @param $default Boolean - * @return Boolean + * @param string $name + * @param bool $default + * @return bool */ public function getFuzzyBool( $name, $default = false ) { - return $this->getBool( $name, $default ) && strcasecmp( $this->getVal( $name ), 'false' ) !== 0; + return $this->getBool( $name, $default ) + && strcasecmp( $this->getRawVal( $name ), 'false' ) !== 0; } /** @@ -376,39 +585,36 @@ class WebRequest { * value is (even "0"). Return false if the named value is not set. * Example use is checking for the presence of check boxes in forms. * - * @param $name String - * @return Boolean + * @param string $name + * @return bool */ public function getCheck( $name ) { # Checkboxes and buttons are only present when clicked - # Presence connotes truth, abscense false - $val = $this->getVal( $name, null ); - return isset( $val ); + # Presence connotes truth, absence false + return $this->getRawVal( $name, null ) !== null; } /** * Fetch a text string from the given array or return $default if it's not - * set. Carriage returns are stripped from the text, and with some language - * modules there is an input transliteration applied. This should generally - * be used for form