]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/libs/MultiHttpClient.php
MediaWiki 1.30.2-scripts
[autoinstalls/mediawiki.git] / includes / libs / MultiHttpClient.php
1 <?php
2 /**
3  * HTTP service client
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  * http://www.gnu.org/copyleft/gpl.html
19  *
20  * @file
21  */
22
23 use Psr\Log\LoggerAwareInterface;
24 use Psr\Log\LoggerInterface;
25 use Psr\Log\NullLogger;
26
27 /**
28  * Class to handle concurrent HTTP requests
29  *
30  * HTTP request maps are arrays that use the following format:
31  *   - method   : GET/HEAD/PUT/POST/DELETE
32  *   - url      : HTTP/HTTPS URL
33  *   - query    : <query parameter field/value associative array> (uses RFC 3986)
34  *   - headers  : <header name/value associative array>
35  *   - body     : source to get the HTTP request body from;
36  *                this can simply be a string (always), a resource for
37  *                PUT requests, and a field/value array for POST request;
38  *                array bodies are encoded as multipart/form-data and strings
39  *                use application/x-www-form-urlencoded (headers sent automatically)
40  *   - stream   : resource to stream the HTTP response body to
41  *   - proxy    : HTTP proxy to use
42  *   - flags    : map of boolean flags which supports:
43  *                  - relayResponseHeaders : write out header via header()
44  * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
45  *
46  * @since 1.23
47  */
48 class MultiHttpClient implements LoggerAwareInterface {
49         /** @var resource */
50         protected $multiHandle = null; // curl_multi handle
51         /** @var string|null SSL certificates path  */
52         protected $caBundlePath;
53         /** @var int */
54         protected $connTimeout = 10;
55         /** @var int */
56         protected $reqTimeout = 300;
57         /** @var bool */
58         protected $usePipelining = false;
59         /** @var int */
60         protected $maxConnsPerHost = 50;
61         /** @var string|null proxy */
62         protected $proxy;
63         /** @var string */
64         protected $userAgent = 'wikimedia/multi-http-client v1.0';
65         /** @var LoggerInterface */
66         protected $logger;
67
68         /**
69          * @param array $options
70          *   - connTimeout     : default connection timeout (seconds)
71          *   - reqTimeout      : default request timeout (seconds)
72          *   - proxy           : HTTP proxy to use
73          *   - usePipelining   : whether to use HTTP pipelining if possible (for all hosts)
74          *   - maxConnsPerHost : maximum number of concurrent connections (per host)
75          *   - userAgent       : The User-Agent header value to send
76          * @throws Exception
77          */
78         public function __construct( array $options ) {
79                 if ( isset( $options['caBundlePath'] ) ) {
80                         $this->caBundlePath = $options['caBundlePath'];
81                         if ( !file_exists( $this->caBundlePath ) ) {
82                                 throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
83                         }
84                 }
85                 static $opts = [
86                         'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost',
87                         'proxy', 'userAgent', 'logger'
88                 ];
89                 foreach ( $opts as $key ) {
90                         if ( isset( $options[$key] ) ) {
91                                 $this->$key = $options[$key];
92                         }
93                 }
94                 if ( $this->logger === null ) {
95                         $this->logger = new NullLogger;
96                 }
97         }
98
99         /**
100          * Execute an HTTP(S) request
101          *
102          * This method returns a response map of:
103          *   - code    : HTTP response code or 0 if there was a serious cURL error
104          *   - reason  : HTTP response reason (empty if there was a serious cURL error)
105          *   - headers : <header name/value associative array>
106          *   - body    : HTTP response body or resource (if "stream" was set)
107          *   - error     : Any cURL error string
108          * The map also stores integer-indexed copies of these values. This lets callers do:
109          * @code
110          *              list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req );
111          * @endcode
112          * @param array $req HTTP request array
113          * @param array $opts
114          *   - connTimeout    : connection timeout per request (seconds)
115          *   - reqTimeout     : post-connection timeout per request (seconds)
116          * @return array Response array for request
117          */
118         public function run( array $req, array $opts = [] ) {
119                 return $this->runMulti( [ $req ], $opts )[0]['response'];
120         }
121
122         /**
123          * Execute a set of HTTP(S) requests concurrently
124          *
125          * The maps are returned by this method with the 'response' field set to a map of:
126          *   - code    : HTTP response code or 0 if there was a serious cURL error
127          *   - reason  : HTTP response reason (empty if there was a serious cURL error)
128          *   - headers : <header name/value associative array>
129          *   - body    : HTTP response body or resource (if "stream" was set)
130          *   - error   : Any cURL error string
131          * The map also stores integer-indexed copies of these values. This lets callers do:
132          * @code
133          *        list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response'];
134          * @endcode
135          * All headers in the 'headers' field are normalized to use lower case names.
136          * This is true for the request headers and the response headers. Integer-indexed
137          * method/URL entries will also be changed to use the corresponding string keys.
138          *
139          * @param array $reqs Map of HTTP request arrays
140          * @param array $opts
141          *   - connTimeout     : connection timeout per request (seconds)
142          *   - reqTimeout      : post-connection timeout per request (seconds)
143          *   - usePipelining   : whether to use HTTP pipelining if possible
144          *   - maxConnsPerHost : maximum number of concurrent connections (per host)
145          * @return array $reqs With response array populated for each
146          * @throws Exception
147          */
148         public function runMulti( array $reqs, array $opts = [] ) {
149                 $chm = $this->getCurlMulti();
150
151                 // Normalize $reqs and add all of the required cURL handles...
152                 $handles = [];
153                 foreach ( $reqs as $index => &$req ) {
154                         $req['response'] = [
155                                 'code'     => 0,
156                                 'reason'   => '',
157                                 'headers'  => [],
158                                 'body'     => '',
159                                 'error'    => ''
160                         ];
161                         if ( isset( $req[0] ) ) {
162                                 $req['method'] = $req[0]; // short-form
163                                 unset( $req[0] );
164                         }
165                         if ( isset( $req[1] ) ) {
166                                 $req['url'] = $req[1]; // short-form
167                                 unset( $req[1] );
168                         }
169                         if ( !isset( $req['method'] ) ) {
170                                 throw new Exception( "Request has no 'method' field set." );
171                         } elseif ( !isset( $req['url'] ) ) {
172                                 throw new Exception( "Request has no 'url' field set." );
173                         }
174                         $this->logger->debug( "{$req['method']}: {$req['url']}" );
175                         $req['query'] = isset( $req['query'] ) ? $req['query'] : [];
176                         $headers = []; // normalized headers
177                         if ( isset( $req['headers'] ) ) {
178                                 foreach ( $req['headers'] as $name => $value ) {
179                                         $headers[strtolower( $name )] = $value;
180                                 }
181                         }
182                         $req['headers'] = $headers;
183                         if ( !isset( $req['body'] ) ) {
184                                 $req['body'] = '';
185                                 $req['headers']['content-length'] = 0;
186                         }
187                         $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
188                         $handles[$index] = $this->getCurlHandle( $req, $opts );
189                         if ( count( $reqs ) > 1 ) {
190                                 // https://github.com/guzzle/guzzle/issues/349
191                                 curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
192                         }
193                 }
194                 unset( $req ); // don't assign over this by accident
195
196                 $indexes = array_keys( $reqs );
197                 if ( isset( $opts['usePipelining'] ) ) {
198                         curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
199                 }
200                 if ( isset( $opts['maxConnsPerHost'] ) ) {
201                         // Keep these sockets around as they may be needed later in the request
202                         curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
203                 }
204
205                 // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
206                 $batches = array_chunk( $indexes, $this->maxConnsPerHost );
207                 $infos = [];
208
209                 foreach ( $batches as $batch ) {
210                         // Attach all cURL handles for this batch
211                         foreach ( $batch as $index ) {
212                                 curl_multi_add_handle( $chm, $handles[$index] );
213                         }
214                         // Execute the cURL handles concurrently...
215                         $active = null; // handles still being processed
216                         do {
217                                 // Do any available work...
218                                 do {
219                                         $mrc = curl_multi_exec( $chm, $active );
220                                         $info = curl_multi_info_read( $chm );
221                                         if ( $info !== false ) {
222                                                 $infos[(int)$info['handle']] = $info;
223                                         }
224                                 } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
225                                 // Wait (if possible) for available work...
226                                 if ( $active > 0 && $mrc == CURLM_OK ) {
227                                         if ( curl_multi_select( $chm, 10 ) == -1 ) {
228                                                 // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html
229                                                 usleep( 5000 ); // 5ms
230                                         }
231                                 }
232                         } while ( $active > 0 && $mrc == CURLM_OK );
233                 }
234
235                 // Remove all of the added cURL handles and check for errors...
236                 foreach ( $reqs as $index => &$req ) {
237                         $ch = $handles[$index];
238                         curl_multi_remove_handle( $chm, $ch );
239
240                         if ( isset( $infos[(int)$ch] ) ) {
241                                 $info = $infos[(int)$ch];
242                                 $errno = $info['result'];
243                                 if ( $errno !== 0 ) {
244                                         $req['response']['error'] = "(curl error: $errno)";
245                                         if ( function_exists( 'curl_strerror' ) ) {
246                                                 $req['response']['error'] .= " " . curl_strerror( $errno );
247                                         }
248                                         $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
249                                                 $req['response']['error'] );
250                                 }
251                         } else {
252                                 $req['response']['error'] = "(curl error: no status set)";
253                         }
254
255                         // For convenience with the list() operator
256                         $req['response'][0] = $req['response']['code'];
257                         $req['response'][1] = $req['response']['reason'];
258                         $req['response'][2] = $req['response']['headers'];
259                         $req['response'][3] = $req['response']['body'];
260                         $req['response'][4] = $req['response']['error'];
261                         curl_close( $ch );
262                         // Close any string wrapper file handles
263                         if ( isset( $req['_closeHandle'] ) ) {
264                                 fclose( $req['_closeHandle'] );
265                                 unset( $req['_closeHandle'] );
266                         }
267                 }
268                 unset( $req ); // don't assign over this by accident
269
270                 // Restore the default settings
271                 curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
272                 curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
273
274                 return $reqs;
275         }
276
277         /**
278          * @param array &$req HTTP request map
279          * @param array $opts
280          *   - connTimeout    : default connection timeout
281          *   - reqTimeout     : default request timeout
282          * @return resource
283          * @throws Exception
284          */
285         protected function getCurlHandle( array &$req, array $opts = [] ) {
286                 $ch = curl_init();
287
288                 curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
289                         isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
290                 curl_setopt( $ch, CURLOPT_PROXY, isset( $req['proxy'] ) ? $req['proxy'] : $this->proxy );
291                 curl_setopt( $ch, CURLOPT_TIMEOUT,
292                         isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
293                 curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
294                 curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
295                 curl_setopt( $ch, CURLOPT_HEADER, 0 );
296                 if ( !is_null( $this->caBundlePath ) ) {
297                         curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
298                         curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
299                 }
300                 curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
301
302                 $url = $req['url'];
303                 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
304                 if ( $query != '' ) {
305                         $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
306                 }
307                 curl_setopt( $ch, CURLOPT_URL, $url );
308
309                 curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
310                 if ( $req['method'] === 'HEAD' ) {
311                         curl_setopt( $ch, CURLOPT_NOBODY, 1 );
312                 }
313
314                 if ( $req['method'] === 'PUT' ) {
315                         curl_setopt( $ch, CURLOPT_PUT, 1 );
316                         if ( is_resource( $req['body'] ) ) {
317                                 curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
318                                 if ( isset( $req['headers']['content-length'] ) ) {
319                                         curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
320                                 } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
321                                         $req['headers']['transfer-encoding'] === 'chunks'
322                                 ) {
323                                         curl_setopt( $ch, CURLOPT_UPLOAD, true );
324                                 } else {
325                                         throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
326                                 }
327                         } elseif ( $req['body'] !== '' ) {
328                                 $fp = fopen( "php://temp", "wb+" );
329                                 fwrite( $fp, $req['body'], strlen( $req['body'] ) );
330                                 rewind( $fp );
331                                 curl_setopt( $ch, CURLOPT_INFILE, $fp );
332                                 curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
333                                 $req['_closeHandle'] = $fp; // remember to close this later
334                         } else {
335                                 curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
336                         }
337                         curl_setopt( $ch, CURLOPT_READFUNCTION,
338                                 function ( $ch, $fd, $length ) {
339                                         $data = fread( $fd, $length );
340                                         $len = strlen( $data );
341                                         return $data;
342                                 }
343                         );
344                 } elseif ( $req['method'] === 'POST' ) {
345                         curl_setopt( $ch, CURLOPT_POST, 1 );
346                         // Don't interpret POST parameters starting with '@' as file uploads, because this
347                         // makes it impossible to POST plain values starting with '@' (and causes security
348                         // issues potentially exposing the contents of local files).
349                         // The PHP manual says this option was introduced in PHP 5.5 defaults to true in PHP 5.6,
350                         // but we support lower versions, and the option doesn't exist in HHVM 5.6.99.
351                         if ( defined( 'CURLOPT_SAFE_UPLOAD' ) ) {
352                                 curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true );
353                         } elseif ( is_array( $req['body'] ) ) {
354                                 // In PHP 5.2 and later, '@' is interpreted as a file upload if POSTFIELDS
355                                 // is an array, but not if it's a string. So convert $req['body'] to a string
356                                 // for safety.
357                                 $req['body'] = http_build_query( $req['body'] );
358                         }
359                         curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
360                 } else {
361                         if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
362                                 throw new Exception( "HTTP body specified for a non PUT/POST request." );
363                         }
364                         $req['headers']['content-length'] = 0;
365                 }
366
367                 if ( !isset( $req['headers']['user-agent'] ) ) {
368                         $req['headers']['user-agent'] = $this->userAgent;
369                 }
370
371                 $headers = [];
372                 foreach ( $req['headers'] as $name => $value ) {
373                         if ( strpos( $name, ': ' ) ) {
374                                 throw new Exception( "Headers cannot have ':' in the name." );
375                         }
376                         $headers[] = $name . ': ' . trim( $value );
377                 }
378                 curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
379
380                 curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
381                         function ( $ch, $header ) use ( &$req ) {
382                                 if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
383                                         header( $header );
384                                 }
385                                 $length = strlen( $header );
386                                 $matches = [];
387                                 if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
388                                         $req['response']['code'] = (int)$matches[2];
389                                         $req['response']['reason'] = trim( $matches[3] );
390                                         return $length;
391                                 }
392                                 if ( strpos( $header, ":" ) === false ) {
393                                         return $length;
394                                 }
395                                 list( $name, $value ) = explode( ":", $header, 2 );
396                                 $req['response']['headers'][strtolower( $name )] = trim( $value );
397                                 return $length;
398                         }
399                 );
400
401                 if ( isset( $req['stream'] ) ) {
402                         // Don't just use CURLOPT_FILE as that might give:
403                         // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
404                         // The callback here handles both normal files and php://temp handles.
405                         curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
406                                 function ( $ch, $data ) use ( &$req ) {
407                                         return fwrite( $req['stream'], $data );
408                                 }
409                         );
410                 } else {
411                         curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
412                                 function ( $ch, $data ) use ( &$req ) {
413                                         $req['response']['body'] .= $data;
414                                         return strlen( $data );
415                                 }
416                         );
417                 }
418
419                 return $ch;
420         }
421
422         /**
423          * @return resource
424          */
425         protected function getCurlMulti() {
426                 if ( !$this->multiHandle ) {
427                         $cmh = curl_multi_init();
428                         curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
429                         curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
430                         $this->multiHandle = $cmh;
431                 }
432                 return $this->multiHandle;
433         }
434
435         /**
436          * Register a logger
437          *
438          * @param LoggerInterface $logger
439          */
440         public function setLogger( LoggerInterface $logger ) {
441                 $this->logger = $logger;
442         }
443
444         function __destruct() {
445                 if ( $this->multiHandle ) {
446                         curl_multi_close( $this->multiHandle );
447                 }
448         }
449 }