5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
8 * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
14 * * Redistributions of source code must retain the above copyright notice, this list of
15 * conditions and the following disclaimer.
17 * * Redistributions in binary form must reproduce the above copyright notice, this list
18 * of conditions and the following disclaimer in the documentation and/or other materials
19 * provided with the distribution.
21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * to endorse or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
37 * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
39 * @author Geoffrey Sneddon
41 * @link http://simplepie.org/ SimplePie
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
47 * HTTP Response Parser
52 class SimplePie_HTTP_Parser
59 public $http_version = 0.0;
66 public $status_code = 0;
76 * Key/value pairs of the headers
80 public $headers = array();
83 * Body of the response
90 * Current state of the state machine
94 protected $state = 'http_version';
101 protected $data = '';
104 * Input data length (to avoid calling strlen() everytime this is needed)
108 protected $data_length = 0;
111 * Current position of the pointer
115 protected $position = 0;
118 * Name of the hedaer currently being parsed
122 protected $name = '';
125 * Value of the hedaer currently being parsed
129 protected $value = '';
132 * Create an instance of the class with the input data
134 * @param string $data Input data
136 public function __construct($data)
139 $this->data_length = strlen($this->data);
143 * Parse the input data
145 * @return bool true on success, false on failure
147 public function parse()
149 while ($this->state && $this->state !== 'emit' && $this->has_data())
151 $state = $this->state;
155 if ($this->state === 'emit' || $this->state === 'body')
161 $this->http_version = '';
162 $this->status_code = '';
164 $this->headers = array();
171 * Check whether there is data beyond the pointer
173 * @return bool true if there is further data, false if not
175 protected function has_data()
177 return (bool) ($this->position < $this->data_length);
181 * See if the next character is LWS
183 * @return bool true if the next character is LWS, false if not
185 protected function is_linear_whitespace()
187 return (bool) ($this->data[$this->position] === "\x09"
188 || $this->data[$this->position] === "\x20"
189 || ($this->data[$this->position] === "\x0A"
190 && isset($this->data[$this->position + 1])
191 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
195 * Parse the HTTP version
197 protected function http_version()
199 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
201 $len = strspn($this->data, '0123456789.', 5);
202 $this->http_version = substr($this->data, 5, $len);
203 $this->position += 5 + $len;
204 if (substr_count($this->http_version, '.') <= 1)
206 $this->http_version = (float) $this->http_version;
207 $this->position += strspn($this->data, "\x09\x20", $this->position);
208 $this->state = 'status';
212 $this->state = false;
217 $this->state = false;
222 * Parse the status code
224 protected function status()
226 if ($len = strspn($this->data, '0123456789', $this->position))
228 $this->status_code = (int) substr($this->data, $this->position, $len);
229 $this->position += $len;
230 $this->state = 'reason';
234 $this->state = false;
239 * Parse the reason phrase
241 protected function reason()
243 $len = strcspn($this->data, "\x0A", $this->position);
244 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
245 $this->position += $len + 1;
246 $this->state = 'new_line';
250 * Deal with a new line, shifting data around as needed
252 protected function new_line()
254 $this->value = trim($this->value, "\x0D\x20");
255 if ($this->name !== '' && $this->value !== '')
257 $this->name = strtolower($this->name);
258 // We should only use the last Content-Type header. c.f. issue #1
259 if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
261 $this->headers[$this->name] .= ', ' . $this->value;
265 $this->headers[$this->name] = $this->value;
270 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
272 $this->position += 2;
273 $this->state = 'body';
275 elseif ($this->data[$this->position] === "\x0A")
278 $this->state = 'body';
282 $this->state = 'name';
287 * Parse a header name
289 protected function name()
291 $len = strcspn($this->data, "\x0A:", $this->position);
292 if (isset($this->data[$this->position + $len]))
294 if ($this->data[$this->position + $len] === "\x0A")
296 $this->position += $len;
297 $this->state = 'new_line';
301 $this->name = substr($this->data, $this->position, $len);
302 $this->position += $len + 1;
303 $this->state = 'value';
308 $this->state = false;
313 * Parse LWS, replacing consecutive LWS characters with a single space
315 protected function linear_whitespace()
319 if (substr($this->data, $this->position, 2) === "\x0D\x0A")
321 $this->position += 2;
323 elseif ($this->data[$this->position] === "\x0A")
327 $this->position += strspn($this->data, "\x09\x20", $this->position);
328 } while ($this->has_data() && $this->is_linear_whitespace());
329 $this->value .= "\x20";
333 * See what state to move to while within non-quoted header values
335 protected function value()
337 if ($this->is_linear_whitespace())
339 $this->linear_whitespace();
343 switch ($this->data[$this->position])
346 // Workaround for ETags: we have to include the quotes as
348 if (strtolower($this->name) === 'etag')
352 $this->state = 'value_char';
356 $this->state = 'quote';
361 $this->state = 'new_line';
365 $this->state = 'value_char';
372 * Parse a header value while outside quotes
374 protected function value_char()
376 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
377 $this->value .= substr($this->data, $this->position, $len);
378 $this->position += $len;
379 $this->state = 'value';
383 * See what state to move to while within quoted header values
385 protected function quote()
387 if ($this->is_linear_whitespace())
389 $this->linear_whitespace();
393 switch ($this->data[$this->position])
397 $this->state = 'value';
402 $this->state = 'new_line';
407 $this->state = 'quote_escaped';
411 $this->state = 'quote_char';
418 * Parse a header value while within quotes
420 protected function quote_char()
422 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
423 $this->value .= substr($this->data, $this->position, $len);
424 $this->position += $len;
425 $this->state = 'value';
429 * Parse an escaped character within quotes
431 protected function quote_escaped()
433 $this->value .= $this->data[$this->position];
435 $this->state = 'quote';
441 protected function body()
443 $this->body = substr($this->data, $this->position);
444 if (!empty($this->headers['transfer-encoding']))
446 unset($this->headers['transfer-encoding']);
447 $this->state = 'chunked';
451 $this->state = 'emit';
456 * Parsed a "Transfer-Encoding: chunked" body
458 protected function chunked()
460 if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
462 $this->state = 'emit';
467 $encoded = $this->body;
471 $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
474 // Looks like it's not chunked after all
475 $this->state = 'emit';
479 $length = hexdec(trim($matches[1]));
482 // Ignore trailer headers
483 $this->state = 'emit';
484 $this->body = $decoded;
488 $chunk_length = strlen($matches[0]);
489 $decoded .= $part = substr($encoded, $chunk_length, $length);
490 $encoded = substr($encoded, $chunk_length + $length + 2);
492 if (trim($encoded) === '0' || empty($encoded))
494 $this->state = 'emit';
495 $this->body = $decoded;