]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-includes/SimplePie/HTTP/Parser.php
Wordpress 3.5.2
[autoinstalls/wordpress.git] / wp-includes / SimplePie / HTTP / Parser.php
1 <?php
2 /**
3  * SimplePie
4  *
5  * A PHP-Based RSS and Atom Feed Framework.
6  * Takes the hard work out of managing a complete RSS/Atom solution.
7  *
8  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without modification, are
12  * permitted provided that the following conditions are met:
13  *
14  *      * Redistributions of source code must retain the above copyright notice, this list of
15  *        conditions and the following disclaimer.
16  *
17  *      * Redistributions in binary form must reproduce the above copyright notice, this list
18  *        of conditions and the following disclaimer in the documentation and/or other materials
19  *        provided with the distribution.
20  *
21  *      * Neither the name of the SimplePie Team nor the names of its contributors may be used
22  *        to endorse or promote products derived from this software without specific prior
23  *        written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  *
35  * @package SimplePie
36  * @version 1.3.1
37  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
38  * @author Ryan Parman
39  * @author Geoffrey Sneddon
40  * @author Ryan McCue
41  * @link http://simplepie.org/ SimplePie
42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43  */
44
45
46 /**
47  * HTTP Response Parser
48  *
49  * @package SimplePie
50  * @subpackage HTTP
51  */
52 class SimplePie_HTTP_Parser
53 {
54         /**
55          * HTTP Version
56          *
57          * @var float
58          */
59         public $http_version = 0.0;
60
61         /**
62          * Status code
63          *
64          * @var int
65          */
66         public $status_code = 0;
67
68         /**
69          * Reason phrase
70          *
71          * @var string
72          */
73         public $reason = '';
74
75         /**
76          * Key/value pairs of the headers
77          *
78          * @var array
79          */
80         public $headers = array();
81
82         /**
83          * Body of the response
84          *
85          * @var string
86          */
87         public $body = '';
88
89         /**
90          * Current state of the state machine
91          *
92          * @var string
93          */
94         protected $state = 'http_version';
95
96         /**
97          * Input data
98          *
99          * @var string
100          */
101         protected $data = '';
102
103         /**
104          * Input data length (to avoid calling strlen() everytime this is needed)
105          *
106          * @var int
107          */
108         protected $data_length = 0;
109
110         /**
111          * Current position of the pointer
112          *
113          * @var int
114          */
115         protected $position = 0;
116
117         /**
118          * Name of the hedaer currently being parsed
119          *
120          * @var string
121          */
122         protected $name = '';
123
124         /**
125          * Value of the hedaer currently being parsed
126          *
127          * @var string
128          */
129         protected $value = '';
130
131         /**
132          * Create an instance of the class with the input data
133          *
134          * @param string $data Input data
135          */
136         public function __construct($data)
137         {
138                 $this->data = $data;
139                 $this->data_length = strlen($this->data);
140         }
141
142         /**
143          * Parse the input data
144          *
145          * @return bool true on success, false on failure
146          */
147         public function parse()
148         {
149                 while ($this->state && $this->state !== 'emit' && $this->has_data())
150                 {
151                         $state = $this->state;
152                         $this->$state();
153                 }
154                 $this->data = '';
155                 if ($this->state === 'emit' || $this->state === 'body')
156                 {
157                         return true;
158                 }
159                 else
160                 {
161                         $this->http_version = '';
162                         $this->status_code = '';
163                         $this->reason = '';
164                         $this->headers = array();
165                         $this->body = '';
166                         return false;
167                 }
168         }
169
170         /**
171          * Check whether there is data beyond the pointer
172          *
173          * @return bool true if there is further data, false if not
174          */
175         protected function has_data()
176         {
177                 return (bool) ($this->position < $this->data_length);
178         }
179
180         /**
181          * See if the next character is LWS
182          *
183          * @return bool true if the next character is LWS, false if not
184          */
185         protected function is_linear_whitespace()
186         {
187                 return (bool) ($this->data[$this->position] === "\x09"
188                         || $this->data[$this->position] === "\x20"
189                         || ($this->data[$this->position] === "\x0A"
190                                 && isset($this->data[$this->position + 1])
191                                 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
192         }
193
194         /**
195          * Parse the HTTP version
196          */
197         protected function http_version()
198         {
199                 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
200                 {
201                         $len = strspn($this->data, '0123456789.', 5);
202                         $this->http_version = substr($this->data, 5, $len);
203                         $this->position += 5 + $len;
204                         if (substr_count($this->http_version, '.') <= 1)
205                         {
206                                 $this->http_version = (float) $this->http_version;
207                                 $this->position += strspn($this->data, "\x09\x20", $this->position);
208                                 $this->state = 'status';
209                         }
210                         else
211                         {
212                                 $this->state = false;
213                         }
214                 }
215                 else
216                 {
217                         $this->state = false;
218                 }
219         }
220
221         /**
222          * Parse the status code
223          */
224         protected function status()
225         {
226                 if ($len = strspn($this->data, '0123456789', $this->position))
227                 {
228                         $this->status_code = (int) substr($this->data, $this->position, $len);
229                         $this->position += $len;
230                         $this->state = 'reason';
231                 }
232                 else
233                 {
234                         $this->state = false;
235                 }
236         }
237
238         /**
239          * Parse the reason phrase
240          */
241         protected function reason()
242         {
243                 $len = strcspn($this->data, "\x0A", $this->position);
244                 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
245                 $this->position += $len + 1;
246                 $this->state = 'new_line';
247         }
248
249         /**
250          * Deal with a new line, shifting data around as needed
251          */
252         protected function new_line()
253         {
254                 $this->value = trim($this->value, "\x0D\x20");
255                 if ($this->name !== '' && $this->value !== '')
256                 {
257                         $this->name = strtolower($this->name);
258                         // We should only use the last Content-Type header. c.f. issue #1
259                         if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
260                         {
261                                 $this->headers[$this->name] .= ', ' . $this->value;
262                         }
263                         else
264                         {
265                                 $this->headers[$this->name] = $this->value;
266                         }
267                 }
268                 $this->name = '';
269                 $this->value = '';
270                 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
271                 {
272                         $this->position += 2;
273                         $this->state = 'body';
274                 }
275                 elseif ($this->data[$this->position] === "\x0A")
276                 {
277                         $this->position++;
278                         $this->state = 'body';
279                 }
280                 else
281                 {
282                         $this->state = 'name';
283                 }
284         }
285
286         /**
287          * Parse a header name
288          */
289         protected function name()
290         {
291                 $len = strcspn($this->data, "\x0A:", $this->position);
292                 if (isset($this->data[$this->position + $len]))
293                 {
294                         if ($this->data[$this->position + $len] === "\x0A")
295                         {
296                                 $this->position += $len;
297                                 $this->state = 'new_line';
298                         }
299                         else
300                         {
301                                 $this->name = substr($this->data, $this->position, $len);
302                                 $this->position += $len + 1;
303                                 $this->state = 'value';
304                         }
305                 }
306                 else
307                 {
308                         $this->state = false;
309                 }
310         }
311
312         /**
313          * Parse LWS, replacing consecutive LWS characters with a single space
314          */
315         protected function linear_whitespace()
316         {
317                 do
318                 {
319                         if (substr($this->data, $this->position, 2) === "\x0D\x0A")
320                         {
321                                 $this->position += 2;
322                         }
323                         elseif ($this->data[$this->position] === "\x0A")
324                         {
325                                 $this->position++;
326                         }
327                         $this->position += strspn($this->data, "\x09\x20", $this->position);
328                 } while ($this->has_data() && $this->is_linear_whitespace());
329                 $this->value .= "\x20";
330         }
331
332         /**
333          * See what state to move to while within non-quoted header values
334          */
335         protected function value()
336         {
337                 if ($this->is_linear_whitespace())
338                 {
339                         $this->linear_whitespace();
340                 }
341                 else
342                 {
343                         switch ($this->data[$this->position])
344                         {
345                                 case '"':
346                                         // Workaround for ETags: we have to include the quotes as
347                                         // part of the tag.
348                                         if (strtolower($this->name) === 'etag')
349                                         {
350                                                 $this->value .= '"';
351                                                 $this->position++;
352                                                 $this->state = 'value_char';
353                                                 break;
354                                         }
355                                         $this->position++;
356                                         $this->state = 'quote';
357                                         break;
358
359                                 case "\x0A":
360                                         $this->position++;
361                                         $this->state = 'new_line';
362                                         break;
363
364                                 default:
365                                         $this->state = 'value_char';
366                                         break;
367                         }
368                 }
369         }
370
371         /**
372          * Parse a header value while outside quotes
373          */
374         protected function value_char()
375         {
376                 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
377                 $this->value .= substr($this->data, $this->position, $len);
378                 $this->position += $len;
379                 $this->state = 'value';
380         }
381
382         /**
383          * See what state to move to while within quoted header values
384          */
385         protected function quote()
386         {
387                 if ($this->is_linear_whitespace())
388                 {
389                         $this->linear_whitespace();
390                 }
391                 else
392                 {
393                         switch ($this->data[$this->position])
394                         {
395                                 case '"':
396                                         $this->position++;
397                                         $this->state = 'value';
398                                         break;
399
400                                 case "\x0A":
401                                         $this->position++;
402                                         $this->state = 'new_line';
403                                         break;
404
405                                 case '\\':
406                                         $this->position++;
407                                         $this->state = 'quote_escaped';
408                                         break;
409
410                                 default:
411                                         $this->state = 'quote_char';
412                                         break;
413                         }
414                 }
415         }
416
417         /**
418          * Parse a header value while within quotes
419          */
420         protected function quote_char()
421         {
422                 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
423                 $this->value .= substr($this->data, $this->position, $len);
424                 $this->position += $len;
425                 $this->state = 'value';
426         }
427
428         /**
429          * Parse an escaped character within quotes
430          */
431         protected function quote_escaped()
432         {
433                 $this->value .= $this->data[$this->position];
434                 $this->position++;
435                 $this->state = 'quote';
436         }
437
438         /**
439          * Parse the body
440          */
441         protected function body()
442         {
443                 $this->body = substr($this->data, $this->position);
444                 if (!empty($this->headers['transfer-encoding']))
445                 {
446                         unset($this->headers['transfer-encoding']);
447                         $this->state = 'chunked';
448                 }
449                 else
450                 {
451                         $this->state = 'emit';
452                 }
453         }
454
455         /**
456          * Parsed a "Transfer-Encoding: chunked" body
457          */
458         protected function chunked()
459         {
460                 if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
461                 {
462                         $this->state = 'emit';
463                         return;
464                 }
465
466                 $decoded = '';
467                 $encoded = $this->body;
468
469                 while (true)
470                 {
471                         $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
472                         if (!$is_chunked)
473                         {
474                                 // Looks like it's not chunked after all
475                                 $this->state = 'emit';
476                                 return;
477                         }
478
479                         $length = hexdec(trim($matches[1]));
480                         if ($length === 0)
481                         {
482                                 // Ignore trailer headers
483                                 $this->state = 'emit';
484                                 $this->body = $decoded;
485                                 return;
486                         }
487
488                         $chunk_length = strlen($matches[0]);
489                         $decoded .= $part = substr($encoded, $chunk_length, $length);
490                         $encoded = substr($encoded, $chunk_length + $length + 2);
491
492                         if (trim($encoded) === '0' || empty($encoded))
493                         {
494                                 $this->state = 'emit';
495                                 $this->body = $decoded;
496                                 return;
497                         }
498                 }
499         }
500 }