3 * Snoopy - the PHP net client
4 * @author Monte Ohrt <monte@ispi.net>
5 * @copyright 1999-2000 ispi, all rights reserved
7 * @license GNU Lesser GPL
8 * @link http://snoopy.sourceforge.net/
12 if ( !in_array('Snoopy', get_declared_classes() ) ) :
14 * Snoopy - the PHP net client
16 * @author Monte Ohrt <monte@ispi.net>
17 * @copyright (c): 1999-2000 ispi, all rights reserved
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
25 * This library is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 * You may contact the author of Snoopy by e-mail at:
40 * 237 S. 70th suite 220
43 * @link http://snoopy.sourceforge.net/ The latest version of Snoopy can be
48 /**** Public variables ****/
50 /* user definable vars */
52 var $host = "www.php.net"; // host name we are connecting to
53 var $port = 80; // port we are connecting to
54 var $proxy_host = ""; // proxy host to use
55 var $proxy_port = ""; // proxy port to use
56 var $proxy_user = ""; // proxy user to use
57 var $proxy_pass = ""; // proxy password to use
59 var $agent = "Snoopy v1.2.3"; // agent we masquerade as
60 var $referer = ""; // referer info to pass
61 var $cookies = array(); // array of cookies to pass
62 // $cookies["username"]="joe";
63 var $rawheaders = array(); // array of raw headers to send
64 // $rawheaders["Content-type"]="text/html";
66 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
67 var $lastredirectaddr = ""; // contains address of last redirected address
68 var $offsiteok = true; // allows redirection off-site
69 var $maxframes = 0; // frame content depth maximum. 0 = disallow
70 var $expandlinks = true; // expand links to fully qualified URLs.
71 // this only applies to fetchlinks()
72 // submitlinks(), and submittext()
73 var $passcookies = true; // pass set cookies back through redirects
74 // NOTE: this currently does not respect
75 // dates, domains or paths.
77 var $user = ""; // user for http authentication
78 var $pass = ""; // password for http authentication
81 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
83 var $results = ""; // where the content is put
85 var $error = ""; // error messages sent here
86 var $response_code = ""; // response code returned from server
87 var $headers = array(); // headers returned from server sent here
88 var $maxlength = 8192; // max return data length (body)
89 var $read_timeout = 0; // timeout on read operations, in seconds
90 // supported only since PHP 4 Beta 4
91 // set to 0 to disallow timeouts
92 var $timed_out = false; // if a read operation timed out
93 var $status = 0; // http request status
95 var $temp_dir = "/tmp"; // temporary directory that the webserver
96 // has permission to write to.
97 // under Windows, this should be C:\temp
99 var $curl_path = "/usr/local/bin/curl";
100 // Snoopy will use cURL for fetching
101 // SSL content if a full system path to
102 // the cURL binary is supplied here.
103 // set to false if you do not have
104 // cURL installed. See http://curl.haxx.se
105 // for details on installing cURL.
106 // Snoopy does *not* use the cURL
107 // library functions built into php,
108 // as these functions are not stable
109 // as of this Snoopy release.
111 /**** Private variables ****/
113 var $_maxlinelen = 4096; // max line length (headers)
115 var $_httpmethod = "GET"; // default http request method
116 var $_httpversion = "HTTP/1.0"; // default http request version
117 var $_submit_method = "POST"; // default submit method
118 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
119 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
120 var $_redirectaddr = false; // will be set if page fetched is a redirect
121 var $_redirectdepth = 0; // increments on an http redirect
122 var $_frameurls = array(); // frame src urls
123 var $_framedepth = 0; // increments on frame depth
125 var $_isproxy = false; // set if using a proxy server
126 var $_fp_timeout = 30; // timeout for socket connection
128 /*======================================================================*\
130 Purpose: fetch the contents of a web page
131 (and possibly other protocols in the
132 future like ftp, nntp, gopher, etc.)
133 Input: $URI the location of the page to fetch
134 Output: $this->results the output text from the fetch
135 \*======================================================================*/
140 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
141 $URI_PARTS = parse_url($URI);
142 if (!empty($URI_PARTS["user"]))
143 $this->user = $URI_PARTS["user"];
144 if (!empty($URI_PARTS["pass"]))
145 $this->pass = $URI_PARTS["pass"];
146 if (empty($URI_PARTS["query"]))
147 $URI_PARTS["query"] = '';
148 if (empty($URI_PARTS["path"]))
149 $URI_PARTS["path"] = '';
151 switch(strtolower($URI_PARTS["scheme"]))
154 $this->host = $URI_PARTS["host"];
155 if(!empty($URI_PARTS["port"]))
156 $this->port = $URI_PARTS["port"];
157 if($this->_connect($fp))
161 // using proxy, send entire URI
162 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
166 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
167 // no proxy, send only the path
168 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
171 $this->_disconnect($fp);
173 if($this->_redirectaddr)
175 /* url was redirected, check if we've hit the max depth */
176 if($this->maxredirs > $this->_redirectdepth)
178 // only follow redirect if it's on this site, or offsiteok is true
179 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
181 /* follow the redirect */
182 $this->_redirectdepth++;
183 $this->lastredirectaddr=$this->_redirectaddr;
184 $this->fetch($this->_redirectaddr);
189 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
191 $frameurls = $this->_frameurls;
192 $this->_frameurls = array();
194 while(list(,$frameurl) = each($frameurls))
196 if($this->_framedepth < $this->maxframes)
198 $this->fetch($frameurl);
199 $this->_framedepth++;
213 if(!$this->curl_path)
215 if(function_exists("is_executable"))
216 if (!is_executable($this->curl_path))
218 $this->host = $URI_PARTS["host"];
219 if(!empty($URI_PARTS["port"]))
220 $this->port = $URI_PARTS["port"];
223 // using proxy, send entire URI
224 $this->_httpsrequest($URI,$URI,$this->_httpmethod);
228 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
229 // no proxy, send only the path
230 $this->_httpsrequest($path, $URI, $this->_httpmethod);
233 if($this->_redirectaddr)
235 /* url was redirected, check if we've hit the max depth */
236 if($this->maxredirs > $this->_redirectdepth)
238 // only follow redirect if it's on this site, or offsiteok is true
239 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
241 /* follow the redirect */
242 $this->_redirectdepth++;
243 $this->lastredirectaddr=$this->_redirectaddr;
244 $this->fetch($this->_redirectaddr);
249 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
251 $frameurls = $this->_frameurls;
252 $this->_frameurls = array();
254 while(list(,$frameurl) = each($frameurls))
256 if($this->_framedepth < $this->maxframes)
258 $this->fetch($frameurl);
259 $this->_framedepth++;
268 // not a valid protocol
269 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
276 /*======================================================================*\
278 Purpose: submit an http form
279 Input: $URI the location to post the data
280 $formvars the formvars to use.
281 format: $formvars["var"] = "val";
282 $formfiles an array of files to submit
283 format: $formfiles["var"] = "/dir/filename.ext";
284 Output: $this->results the text output from the post
285 \*======================================================================*/
287 function submit($URI, $formvars="", $formfiles="")
291 $postdata = $this->_prepare_post_body($formvars, $formfiles);
293 $URI_PARTS = parse_url($URI);
294 if (!empty($URI_PARTS["user"]))
295 $this->user = $URI_PARTS["user"];
296 if (!empty($URI_PARTS["pass"]))
297 $this->pass = $URI_PARTS["pass"];
298 if (empty($URI_PARTS["query"]))
299 $URI_PARTS["query"] = '';
300 if (empty($URI_PARTS["path"]))
301 $URI_PARTS["path"] = '';
303 switch(strtolower($URI_PARTS["scheme"]))
306 $this->host = $URI_PARTS["host"];
307 if(!empty($URI_PARTS["port"]))
308 $this->port = $URI_PARTS["port"];
309 if($this->_connect($fp))
313 // using proxy, send entire URI
314 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
318 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
319 // no proxy, send only the path
320 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
323 $this->_disconnect($fp);
325 if($this->_redirectaddr)
327 /* url was redirected, check if we've hit the max depth */
328 if($this->maxredirs > $this->_redirectdepth)
330 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
331 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
333 // only follow redirect if it's on this site, or offsiteok is true
334 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
336 /* follow the redirect */
337 $this->_redirectdepth++;
338 $this->lastredirectaddr=$this->_redirectaddr;
339 if( strpos( $this->_redirectaddr, "?" ) > 0 )
340 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
342 $this->submit($this->_redirectaddr,$formvars, $formfiles);
347 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
349 $frameurls = $this->_frameurls;
350 $this->_frameurls = array();
352 while(list(,$frameurl) = each($frameurls))
354 if($this->_framedepth < $this->maxframes)
356 $this->fetch($frameurl);
357 $this->_framedepth++;
372 if(!$this->curl_path)
374 if(function_exists("is_executable"))
375 if (!is_executable($this->curl_path))
377 $this->host = $URI_PARTS["host"];
378 if(!empty($URI_PARTS["port"]))
379 $this->port = $URI_PARTS["port"];
382 // using proxy, send entire URI
383 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
387 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
388 // no proxy, send only the path
389 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
392 if($this->_redirectaddr)
394 /* url was redirected, check if we've hit the max depth */
395 if($this->maxredirs > $this->_redirectdepth)
397 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
398 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
400 // only follow redirect if it's on this site, or offsiteok is true
401 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
403 /* follow the redirect */
404 $this->_redirectdepth++;
405 $this->lastredirectaddr=$this->_redirectaddr;
406 if( strpos( $this->_redirectaddr, "?" ) > 0 )
407 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
409 $this->submit($this->_redirectaddr,$formvars, $formfiles);
414 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
416 $frameurls = $this->_frameurls;
417 $this->_frameurls = array();
419 while(list(,$frameurl) = each($frameurls))
421 if($this->_framedepth < $this->maxframes)
423 $this->fetch($frameurl);
424 $this->_framedepth++;
434 // not a valid protocol
435 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
442 /*======================================================================*\
444 Purpose: fetch the links from a web page
445 Input: $URI where you are fetching from
446 Output: $this->results an array of the URLs
447 \*======================================================================*/
449 function fetchlinks($URI)
451 if ($this->fetch($URI))
453 if($this->lastredirectaddr)
454 $URI = $this->lastredirectaddr;
455 if(is_array($this->results))
457 for($x=0;$x<count($this->results);$x++)
458 $this->results[$x] = $this->_striplinks($this->results[$x]);
461 $this->results = $this->_striplinks($this->results);
463 if($this->expandlinks)
464 $this->results = $this->_expandlinks($this->results, $URI);
471 /*======================================================================*\
473 Purpose: fetch the form elements from a web page
474 Input: $URI where you are fetching from
475 Output: $this->results the resulting html form
476 \*======================================================================*/
478 function fetchform($URI)
481 if ($this->fetch($URI))
484 if(is_array($this->results))
486 for($x=0;$x<count($this->results);$x++)
487 $this->results[$x] = $this->_stripform($this->results[$x]);
490 $this->results = $this->_stripform($this->results);
499 /*======================================================================*\
501 Purpose: fetch the text from a web page, stripping the links
502 Input: $URI where you are fetching from
503 Output: $this->results the text from the web page
504 \*======================================================================*/
506 function fetchtext($URI)
508 if($this->fetch($URI))
510 if(is_array($this->results))
512 for($x=0;$x<count($this->results);$x++)
513 $this->results[$x] = $this->_striptext($this->results[$x]);
516 $this->results = $this->_striptext($this->results);
523 /*======================================================================*\
524 Function: submitlinks
525 Purpose: grab links from a form submission
526 Input: $URI where you are submitting from
527 Output: $this->results an array of the links from the post
528 \*======================================================================*/
530 function submitlinks($URI, $formvars="", $formfiles="")
532 if($this->submit($URI,$formvars, $formfiles))
534 if($this->lastredirectaddr)
535 $URI = $this->lastredirectaddr;
536 if(is_array($this->results))
538 for($x=0;$x<count($this->results);$x++)
540 $this->results[$x] = $this->_striplinks($this->results[$x]);
541 if($this->expandlinks)
542 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
547 $this->results = $this->_striplinks($this->results);
548 if($this->expandlinks)
549 $this->results = $this->_expandlinks($this->results,$URI);
557 /*======================================================================*\
559 Purpose: grab text from a form submission
560 Input: $URI where you are submitting from
561 Output: $this->results the text from the web page
562 \*======================================================================*/
564 function submittext($URI, $formvars = "", $formfiles = "")
566 if($this->submit($URI,$formvars, $formfiles))
568 if($this->lastredirectaddr)
569 $URI = $this->lastredirectaddr;
570 if(is_array($this->results))
572 for($x=0;$x<count($this->results);$x++)
574 $this->results[$x] = $this->_striptext($this->results[$x]);
575 if($this->expandlinks)
576 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
581 $this->results = $this->_striptext($this->results);
582 if($this->expandlinks)
583 $this->results = $this->_expandlinks($this->results,$URI);
593 /*======================================================================*\
594 Function: set_submit_multipart
595 Purpose: Set the form submission content type to
597 \*======================================================================*/
598 function set_submit_multipart()
600 $this->_submit_type = "multipart/form-data";
604 /*======================================================================*\
605 Function: set_submit_normal
606 Purpose: Set the form submission content type to
607 application/x-www-form-urlencoded
608 \*======================================================================*/
609 function set_submit_normal()
611 $this->_submit_type = "application/x-www-form-urlencoded";
617 /*======================================================================*\
619 \*======================================================================*/
622 /*======================================================================*\
623 Function: _striplinks
624 Purpose: strip the hyperlinks from an html document
625 Input: $document document to strip.
626 Output: $match an array of the links
627 \*======================================================================*/
629 function _striplinks($document)
631 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
632 ([\"\'])? # find single or double quote
633 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
634 # quote, otherwise match up to next space
635 'isx",$document,$links);
638 // catenate the non-empty matches from the conditional subpattern
640 while(list($key,$val) = each($links[2]))
646 while(list($key,$val) = each($links[3]))
656 /*======================================================================*\
658 Purpose: strip the form elements from an html document
659 Input: $document document to strip.
660 Output: $match an array of the links
661 \*======================================================================*/
663 function _stripform($document)
665 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
667 // catenate the matches
668 $match = implode("\r\n",$elements[0]);
676 /*======================================================================*\
678 Purpose: strip the text from an html document
679 Input: $document document to strip.
680 Output: $text the resulting text
681 \*======================================================================*/
683 function _striptext($document)
686 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
687 // so, list your entities one by one here. I included some of the
690 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
691 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
692 "'([\r\n])[\s]+'", // strip out white space
693 "'&(quot|#34|#034|#x22);'i", // replace html entities
694 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
695 "'&(lt|#60|#060|#x3c);'i",
696 "'&(gt|#62|#062|#x3e);'i",
697 "'&(nbsp|#160|#xa0);'i",
704 "'&(#39|#039|#x27);'",
705 "'&(euro|#8364);'i", // europe
706 "'&a(uml|UML);'", // german
714 $replace = array( "",
739 $text = preg_replace($search,$replace,$document);
744 /*======================================================================*\
745 Function: _expandlinks
746 Purpose: expand each link into a fully qualified URL
747 Input: $links the links to qualify
748 $URI the full URI to get the base from
749 Output: $expandedLinks the expanded links
750 \*======================================================================*/
752 function _expandlinks($links,$URI)
755 preg_match("/^[^\?]+/",$URI,$match);
757 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
758 $match = preg_replace("|/$|","",$match);
759 $match_part = parse_url($match);
761 $match_part["scheme"]."://".$match_part["host"];
763 $search = array( "|^http://".preg_quote($this->host)."|i",
765 "|^(?!http://)(?!mailto:)|i",
770 $replace = array( "",
777 $expandedLinks = preg_replace($search,$replace,$links);
779 return $expandedLinks;
782 /*======================================================================*\
783 Function: _httprequest
784 Purpose: go get the http data from the server
785 Input: $url the url to fetch
786 $fp the current open file pointer
788 $body body contents to send if any (POST)
790 \*======================================================================*/
792 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
794 $cookie_headers = '';
795 if($this->passcookies && $this->_redirectaddr)
798 $URI_PARTS = parse_url($URI);
801 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
802 if(!empty($this->agent))
803 $headers .= "User-Agent: ".$this->agent."\r\n";
804 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
805 $headers .= "Host: ".$this->host;
806 if(!empty($this->port) && $this->port != 80)
807 $headers .= ":".$this->port;
810 if(!empty($this->accept))
811 $headers .= "Accept: ".$this->accept."\r\n";
812 if(!empty($this->referer))
813 $headers .= "Referer: ".$this->referer."\r\n";
814 if(!empty($this->cookies))
816 if(!is_array($this->cookies))
817 $this->cookies = (array)$this->cookies;
819 reset($this->cookies);
820 if ( count($this->cookies) > 0 ) {
821 $cookie_headers .= 'Cookie: ';
822 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
823 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
825 $headers .= substr($cookie_headers,0,-2) . "\r\n";
828 if(!empty($this->rawheaders))
830 if(!is_array($this->rawheaders))
831 $this->rawheaders = (array)$this->rawheaders;
832 while(list($headerKey,$headerVal) = each($this->rawheaders))
833 $headers .= $headerKey.": ".$headerVal."\r\n";
835 if(!empty($content_type)) {
836 $headers .= "Content-type: $content_type";
837 if ($content_type == "multipart/form-data")
838 $headers .= "; boundary=".$this->_mime_boundary;
842 $headers .= "Content-length: ".strlen($body)."\r\n";
843 if(!empty($this->user) || !empty($this->pass))
844 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
846 //add proxy auth headers
847 if(!empty($this->proxy_user))
848 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
853 // set the read timeout if needed
854 if ($this->read_timeout > 0)
855 socket_set_timeout($fp, $this->read_timeout);
856 $this->timed_out = false;
858 fwrite($fp,$headers.$body,strlen($headers.$body));
860 $this->_redirectaddr = false;
861 unset($this->headers);
863 while($currentHeader = fgets($fp,$this->_maxlinelen))
865 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
871 if($currentHeader == "\r\n")
874 // if a header begins with Location: or URI:, set the redirect
875 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
877 // get URL portion of the redirect
878 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
879 // look for :// in the Location header to see if hostname is included
880 if(!preg_match("|\:\/\/|",$matches[2]))
882 // no host in the path, so prepend
883 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
884 // eliminate double slash
885 if(!preg_match("|^/|",$matches[2]))
886 $this->_redirectaddr .= "/".$matches[2];
888 $this->_redirectaddr .= $matches[2];
891 $this->_redirectaddr = $matches[2];
894 if(preg_match("|^HTTP/|",$currentHeader))
896 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
898 $this->status= $status[1];
900 $this->response_code = $currentHeader;
903 $this->headers[] = $currentHeader;
908 $_data = fread($fp, $this->maxlength);
909 if (strlen($_data) == 0) {
915 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
921 // check if there is a a redirect meta tag
923 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
926 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
929 // have we hit our frame depth and is there frame src to fetch?
930 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
932 $this->results[] = $results;
933 for($x=0; $x<count($match[1]); $x++)
934 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
936 // have we already fetched framed content?
937 elseif(is_array($this->results))
938 $this->results[] = $results;
941 $this->results = $results;
946 /*======================================================================*\
947 Function: _httpsrequest
948 Purpose: go get the https data from the server using curl
949 Input: $url the url to fetch
951 $body body contents to send if any (POST)
953 \*======================================================================*/
955 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
957 if($this->passcookies && $this->_redirectaddr)
962 $URI_PARTS = parse_url($URI);
965 // GET ... header not needed for curl
966 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
967 if(!empty($this->agent))
968 $headers[] = "User-Agent: ".$this->agent;
969 if(!empty($this->host))
970 if(!empty($this->port))
971 $headers[] = "Host: ".$this->host.":".$this->port;
973 $headers[] = "Host: ".$this->host;
974 if(!empty($this->accept))
975 $headers[] = "Accept: ".$this->accept;
976 if(!empty($this->referer))
977 $headers[] = "Referer: ".$this->referer;
978 if(!empty($this->cookies))
980 if(!is_array($this->cookies))
981 $this->cookies = (array)$this->cookies;
983 reset($this->cookies);
984 if ( count($this->cookies) > 0 ) {
985 $cookie_str = 'Cookie: ';
986 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
987 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
989 $headers[] = substr($cookie_str,0,-2);
992 if(!empty($this->rawheaders))
994 if(!is_array($this->rawheaders))
995 $this->rawheaders = (array)$this->rawheaders;
996 while(list($headerKey,$headerVal) = each($this->rawheaders))
997 $headers[] = $headerKey.": ".$headerVal;
999 if(!empty($content_type)) {
1000 if ($content_type == "multipart/form-data")
1001 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1003 $headers[] = "Content-type: $content_type";
1006 $headers[] = "Content-length: ".strlen($body);
1007 if(!empty($this->user) || !empty($this->pass))
1008 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1010 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1011 $safer_header = strtr( $headers[$curr_header], "\"", " " );
1012 $cmdline_params .= " -H \"".$safer_header."\"";
1016 $cmdline_params .= " -d \"$body\"";
1018 if($this->read_timeout > 0)
1019 $cmdline_params .= " -m ".$this->read_timeout;
1021 $headerfile = tempnam($temp_dir, "sno");
1023 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1024 exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1028 $this->error = "Error: cURL could not retrieve the document, error $return.";
1033 $results = implode("\r\n",$results);
1035 $result_headers = file("$headerfile");
1037 $this->_redirectaddr = false;
1038 unset($this->headers);
1040 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1043 // if a header begins with Location: or URI:, set the redirect
1044 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1046 // get URL portion of the redirect
1047 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1048 // look for :// in the Location header to see if hostname is included
1049 if(!preg_match("|\:\/\/|",$matches[2]))
1051 // no host in the path, so prepend
1052 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1053 // eliminate double slash
1054 if(!preg_match("|^/|",$matches[2]))
1055 $this->_redirectaddr .= "/".$matches[2];
1057 $this->_redirectaddr .= $matches[2];
1060 $this->_redirectaddr = $matches[2];
1063 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1064 $this->response_code = $result_headers[$currentHeader];
1066 $this->headers[] = $result_headers[$currentHeader];
1069 // check if there is a a redirect meta tag
1071 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1073 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1076 // have we hit our frame depth and is there frame src to fetch?
1077 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1079 $this->results[] = $results;
1080 for($x=0; $x<count($match[1]); $x++)
1081 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1083 // have we already fetched framed content?
1084 elseif(is_array($this->results))
1085 $this->results[] = $results;
1086 // no framed content
1088 $this->results = $results;
1090 unlink("$headerfile");
1095 /*======================================================================*\
1096 Function: setcookies()
1097 Purpose: set cookies for a redirection
1098 \*======================================================================*/
1100 function setcookies()
1102 for($x=0; $x<count($this->headers); $x++)
1104 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1105 $this->cookies[$match[1]] = urldecode($match[2]);
1110 /*======================================================================*\
1111 Function: _check_timeout
1112 Purpose: checks whether timeout has occurred
1113 Input: $fp file pointer
1114 \*======================================================================*/
1116 function _check_timeout($fp)
1118 if ($this->read_timeout > 0) {
1119 $fp_status = socket_get_status($fp);
1120 if ($fp_status["timed_out"]) {
1121 $this->timed_out = true;
1128 /*======================================================================*\
1130 Purpose: make a socket connection
1131 Input: $fp file pointer
1132 \*======================================================================*/
1134 function _connect(&$fp)
1136 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1138 $this->_isproxy = true;
1140 $host = $this->proxy_host;
1141 $port = $this->proxy_port;
1145 $host = $this->host;
1146 $port = $this->port;
1159 // socket connection succeeded
1165 // socket connection failed
1166 $this->status = $errno;
1170 $this->error="socket creation failed (-3)";
1172 $this->error="dns lookup failure (-4)";
1174 $this->error="connection refused or timed out (-5)";
1176 $this->error="connection failed (".$errno.")";
1181 /*======================================================================*\
1182 Function: _disconnect
1183 Purpose: disconnect a socket connection
1184 Input: $fp file pointer
1185 \*======================================================================*/
1187 function _disconnect($fp)
1189 return(fclose($fp));
1193 /*======================================================================*\
1194 Function: _prepare_post_body
1195 Purpose: Prepare post body according to encoding type
1196 Input: $formvars - form variables
1197 $formfiles - form upload files
1199 \*======================================================================*/
1201 function _prepare_post_body($formvars, $formfiles)
1203 settype($formvars, "array");
1204 settype($formfiles, "array");
1207 if (count($formvars) == 0 && count($formfiles) == 0)
1210 switch ($this->_submit_type) {
1211 case "application/x-www-form-urlencoded":
1213 while(list($key,$val) = each($formvars)) {
1214 if (is_array($val) || is_object($val)) {
1215 while (list($cur_key, $cur_val) = each($val)) {
1216 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1219 $postdata .= urlencode($key)."=".urlencode($val)."&";
1223 case "multipart/form-data":
1224 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1227 while(list($key,$val) = each($formvars)) {
1228 if (is_array($val) || is_object($val)) {
1229 while (list($cur_key, $cur_val) = each($val)) {
1230 $postdata .= "--".$this->_mime_boundary."\r\n";
1231 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1232 $postdata .= "$cur_val\r\n";
1235 $postdata .= "--".$this->_mime_boundary."\r\n";
1236 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1237 $postdata .= "$val\r\n";
1242 while (list($field_name, $file_names) = each($formfiles)) {
1243 settype($file_names, "array");
1244 while (list(, $file_name) = each($file_names)) {
1245 if (!is_readable($file_name)) continue;
1247 $fp = fopen($file_name, "r");
1248 while (!feof($fp)) {
1249 $file_content .= fread($fp, filesize($file_name));
1252 $base_name = basename($file_name);
1254 $postdata .= "--".$this->_mime_boundary."\r\n";
1255 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1256 $postdata .= "$file_content\r\n";
1259 $postdata .= "--".$this->_mime_boundary."--\r\n";