3 * Snoopy - the PHP net client
4 * @author Monte Ohrt <monte@ispi.net>
5 * @copyright 1999-2000 ispi, all rights reserved
7 * @license GNU Lesser GPL
8 * @link http://snoopy.sourceforge.net/
11 /*************************************************
13 Snoopy - the PHP net client
14 Author: Monte Ohrt <monte@ispi.net>
15 Copyright (c): 1999-2000 ispi, all rights reserved
18 * This library is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU Lesser General Public
20 * License as published by the Free Software Foundation; either
21 * version 2.1 of the License, or (at your option) any later version.
23 * This library is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * Lesser General Public License for more details.
28 * You should have received a copy of the GNU Lesser General Public
29 * License along with this library; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 You may contact the author of Snoopy by e-mail at:
41 The latest version of Snoopy can be obtained from:
42 http://snoopy.sourceforge.net/
44 *************************************************/
46 if ( !in_array('Snoopy', get_declared_classes() ) ) :
49 /**** Public variables ****/
51 /* user definable vars */
53 var $host = "www.php.net"; // host name we are connecting to
54 var $port = 80; // port we are connecting to
55 var $proxy_host = ""; // proxy host to use
56 var $proxy_port = ""; // proxy port to use
57 var $proxy_user = ""; // proxy user to use
58 var $proxy_pass = ""; // proxy password to use
60 var $agent = "Snoopy v1.2.3"; // agent we masquerade as
61 var $referer = ""; // referer info to pass
62 var $cookies = array(); // array of cookies to pass
63 // $cookies["username"]="joe";
64 var $rawheaders = array(); // array of raw headers to send
65 // $rawheaders["Content-type"]="text/html";
67 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
68 var $lastredirectaddr = ""; // contains address of last redirected address
69 var $offsiteok = true; // allows redirection off-site
70 var $maxframes = 0; // frame content depth maximum. 0 = disallow
71 var $expandlinks = true; // expand links to fully qualified URLs.
72 // this only applies to fetchlinks()
73 // submitlinks(), and submittext()
74 var $passcookies = true; // pass set cookies back through redirects
75 // NOTE: this currently does not respect
76 // dates, domains or paths.
78 var $user = ""; // user for http authentication
79 var $pass = ""; // password for http authentication
82 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
84 var $results = ""; // where the content is put
86 var $error = ""; // error messages sent here
87 var $response_code = ""; // response code returned from server
88 var $headers = array(); // headers returned from server sent here
89 var $maxlength = 8192; // max return data length (body)
90 var $read_timeout = 0; // timeout on read operations, in seconds
91 // supported only since PHP 4 Beta 4
92 // set to 0 to disallow timeouts
93 var $timed_out = false; // if a read operation timed out
94 var $status = 0; // http request status
96 var $temp_dir = "/tmp"; // temporary directory that the webserver
97 // has permission to write to.
98 // under Windows, this should be C:\temp
100 var $curl_path = "/usr/local/bin/curl";
101 // Snoopy will use cURL for fetching
102 // SSL content if a full system path to
103 // the cURL binary is supplied here.
104 // set to false if you do not have
105 // cURL installed. See http://curl.haxx.se
106 // for details on installing cURL.
107 // Snoopy does *not* use the cURL
108 // library functions built into php,
109 // as these functions are not stable
110 // as of this Snoopy release.
112 /**** Private variables ****/
114 var $_maxlinelen = 4096; // max line length (headers)
116 var $_httpmethod = "GET"; // default http request method
117 var $_httpversion = "HTTP/1.0"; // default http request version
118 var $_submit_method = "POST"; // default submit method
119 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
120 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
121 var $_redirectaddr = false; // will be set if page fetched is a redirect
122 var $_redirectdepth = 0; // increments on an http redirect
123 var $_frameurls = array(); // frame src urls
124 var $_framedepth = 0; // increments on frame depth
126 var $_isproxy = false; // set if using a proxy server
127 var $_fp_timeout = 30; // timeout for socket connection
129 /*======================================================================*\
131 Purpose: fetch the contents of a web page
132 (and possibly other protocols in the
133 future like ftp, nntp, gopher, etc.)
134 Input: $URI the location of the page to fetch
135 Output: $this->results the output text from the fetch
136 \*======================================================================*/
141 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
142 $URI_PARTS = parse_url($URI);
143 if (!empty($URI_PARTS["user"]))
144 $this->user = $URI_PARTS["user"];
145 if (!empty($URI_PARTS["pass"]))
146 $this->pass = $URI_PARTS["pass"];
147 if (empty($URI_PARTS["query"]))
148 $URI_PARTS["query"] = '';
149 if (empty($URI_PARTS["path"]))
150 $URI_PARTS["path"] = '';
152 switch(strtolower($URI_PARTS["scheme"]))
155 $this->host = $URI_PARTS["host"];
156 if(!empty($URI_PARTS["port"]))
157 $this->port = $URI_PARTS["port"];
158 if($this->_connect($fp))
162 // using proxy, send entire URI
163 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
167 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
168 // no proxy, send only the path
169 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
172 $this->_disconnect($fp);
174 if($this->_redirectaddr)
176 /* url was redirected, check if we've hit the max depth */
177 if($this->maxredirs > $this->_redirectdepth)
179 // only follow redirect if it's on this site, or offsiteok is true
180 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
182 /* follow the redirect */
183 $this->_redirectdepth++;
184 $this->lastredirectaddr=$this->_redirectaddr;
185 $this->fetch($this->_redirectaddr);
190 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
192 $frameurls = $this->_frameurls;
193 $this->_frameurls = array();
195 while(list(,$frameurl) = each($frameurls))
197 if($this->_framedepth < $this->maxframes)
199 $this->fetch($frameurl);
200 $this->_framedepth++;
214 if(!$this->curl_path)
216 if(function_exists("is_executable"))
217 if (!is_executable($this->curl_path))
219 $this->host = $URI_PARTS["host"];
220 if(!empty($URI_PARTS["port"]))
221 $this->port = $URI_PARTS["port"];
224 // using proxy, send entire URI
225 $this->_httpsrequest($URI,$URI,$this->_httpmethod);
229 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
230 // no proxy, send only the path
231 $this->_httpsrequest($path, $URI, $this->_httpmethod);
234 if($this->_redirectaddr)
236 /* url was redirected, check if we've hit the max depth */
237 if($this->maxredirs > $this->_redirectdepth)
239 // only follow redirect if it's on this site, or offsiteok is true
240 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
242 /* follow the redirect */
243 $this->_redirectdepth++;
244 $this->lastredirectaddr=$this->_redirectaddr;
245 $this->fetch($this->_redirectaddr);
250 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
252 $frameurls = $this->_frameurls;
253 $this->_frameurls = array();
255 while(list(,$frameurl) = each($frameurls))
257 if($this->_framedepth < $this->maxframes)
259 $this->fetch($frameurl);
260 $this->_framedepth++;
269 // not a valid protocol
270 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
277 /*======================================================================*\
279 Purpose: submit an http form
280 Input: $URI the location to post the data
281 $formvars the formvars to use.
282 format: $formvars["var"] = "val";
283 $formfiles an array of files to submit
284 format: $formfiles["var"] = "/dir/filename.ext";
285 Output: $this->results the text output from the post
286 \*======================================================================*/
288 function submit($URI, $formvars="", $formfiles="")
292 $postdata = $this->_prepare_post_body($formvars, $formfiles);
294 $URI_PARTS = parse_url($URI);
295 if (!empty($URI_PARTS["user"]))
296 $this->user = $URI_PARTS["user"];
297 if (!empty($URI_PARTS["pass"]))
298 $this->pass = $URI_PARTS["pass"];
299 if (empty($URI_PARTS["query"]))
300 $URI_PARTS["query"] = '';
301 if (empty($URI_PARTS["path"]))
302 $URI_PARTS["path"] = '';
304 switch(strtolower($URI_PARTS["scheme"]))
307 $this->host = $URI_PARTS["host"];
308 if(!empty($URI_PARTS["port"]))
309 $this->port = $URI_PARTS["port"];
310 if($this->_connect($fp))
314 // using proxy, send entire URI
315 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
319 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
320 // no proxy, send only the path
321 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
324 $this->_disconnect($fp);
326 if($this->_redirectaddr)
328 /* url was redirected, check if we've hit the max depth */
329 if($this->maxredirs > $this->_redirectdepth)
331 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
332 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
334 // only follow redirect if it's on this site, or offsiteok is true
335 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
337 /* follow the redirect */
338 $this->_redirectdepth++;
339 $this->lastredirectaddr=$this->_redirectaddr;
340 if( strpos( $this->_redirectaddr, "?" ) > 0 )
341 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
343 $this->submit($this->_redirectaddr,$formvars, $formfiles);
348 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
350 $frameurls = $this->_frameurls;
351 $this->_frameurls = array();
353 while(list(,$frameurl) = each($frameurls))
355 if($this->_framedepth < $this->maxframes)
357 $this->fetch($frameurl);
358 $this->_framedepth++;
373 if(!$this->curl_path)
375 if(function_exists("is_executable"))
376 if (!is_executable($this->curl_path))
378 $this->host = $URI_PARTS["host"];
379 if(!empty($URI_PARTS["port"]))
380 $this->port = $URI_PARTS["port"];
383 // using proxy, send entire URI
384 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
388 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
389 // no proxy, send only the path
390 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
393 if($this->_redirectaddr)
395 /* url was redirected, check if we've hit the max depth */
396 if($this->maxredirs > $this->_redirectdepth)
398 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
399 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
401 // only follow redirect if it's on this site, or offsiteok is true
402 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
404 /* follow the redirect */
405 $this->_redirectdepth++;
406 $this->lastredirectaddr=$this->_redirectaddr;
407 if( strpos( $this->_redirectaddr, "?" ) > 0 )
408 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
410 $this->submit($this->_redirectaddr,$formvars, $formfiles);
415 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
417 $frameurls = $this->_frameurls;
418 $this->_frameurls = array();
420 while(list(,$frameurl) = each($frameurls))
422 if($this->_framedepth < $this->maxframes)
424 $this->fetch($frameurl);
425 $this->_framedepth++;
435 // not a valid protocol
436 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
443 /*======================================================================*\
445 Purpose: fetch the links from a web page
446 Input: $URI where you are fetching from
447 Output: $this->results an array of the URLs
448 \*======================================================================*/
450 function fetchlinks($URI)
452 if ($this->fetch($URI))
454 if($this->lastredirectaddr)
455 $URI = $this->lastredirectaddr;
456 if(is_array($this->results))
458 for($x=0;$x<count($this->results);$x++)
459 $this->results[$x] = $this->_striplinks($this->results[$x]);
462 $this->results = $this->_striplinks($this->results);
464 if($this->expandlinks)
465 $this->results = $this->_expandlinks($this->results, $URI);
472 /*======================================================================*\
474 Purpose: fetch the form elements from a web page
475 Input: $URI where you are fetching from
476 Output: $this->results the resulting html form
477 \*======================================================================*/
479 function fetchform($URI)
482 if ($this->fetch($URI))
485 if(is_array($this->results))
487 for($x=0;$x<count($this->results);$x++)
488 $this->results[$x] = $this->_stripform($this->results[$x]);
491 $this->results = $this->_stripform($this->results);
500 /*======================================================================*\
502 Purpose: fetch the text from a web page, stripping the links
503 Input: $URI where you are fetching from
504 Output: $this->results the text from the web page
505 \*======================================================================*/
507 function fetchtext($URI)
509 if($this->fetch($URI))
511 if(is_array($this->results))
513 for($x=0;$x<count($this->results);$x++)
514 $this->results[$x] = $this->_striptext($this->results[$x]);
517 $this->results = $this->_striptext($this->results);
524 /*======================================================================*\
525 Function: submitlinks
526 Purpose: grab links from a form submission
527 Input: $URI where you are submitting from
528 Output: $this->results an array of the links from the post
529 \*======================================================================*/
531 function submitlinks($URI, $formvars="", $formfiles="")
533 if($this->submit($URI,$formvars, $formfiles))
535 if($this->lastredirectaddr)
536 $URI = $this->lastredirectaddr;
537 if(is_array($this->results))
539 for($x=0;$x<count($this->results);$x++)
541 $this->results[$x] = $this->_striplinks($this->results[$x]);
542 if($this->expandlinks)
543 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
548 $this->results = $this->_striplinks($this->results);
549 if($this->expandlinks)
550 $this->results = $this->_expandlinks($this->results,$URI);
558 /*======================================================================*\
560 Purpose: grab text from a form submission
561 Input: $URI where you are submitting from
562 Output: $this->results the text from the web page
563 \*======================================================================*/
565 function submittext($URI, $formvars = "", $formfiles = "")
567 if($this->submit($URI,$formvars, $formfiles))
569 if($this->lastredirectaddr)
570 $URI = $this->lastredirectaddr;
571 if(is_array($this->results))
573 for($x=0;$x<count($this->results);$x++)
575 $this->results[$x] = $this->_striptext($this->results[$x]);
576 if($this->expandlinks)
577 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
582 $this->results = $this->_striptext($this->results);
583 if($this->expandlinks)
584 $this->results = $this->_expandlinks($this->results,$URI);
594 /*======================================================================*\
595 Function: set_submit_multipart
596 Purpose: Set the form submission content type to
598 \*======================================================================*/
599 function set_submit_multipart()
601 $this->_submit_type = "multipart/form-data";
605 /*======================================================================*\
606 Function: set_submit_normal
607 Purpose: Set the form submission content type to
608 application/x-www-form-urlencoded
609 \*======================================================================*/
610 function set_submit_normal()
612 $this->_submit_type = "application/x-www-form-urlencoded";
618 /*======================================================================*\
620 \*======================================================================*/
623 /*======================================================================*\
624 Function: _striplinks
625 Purpose: strip the hyperlinks from an html document
626 Input: $document document to strip.
627 Output: $match an array of the links
628 \*======================================================================*/
630 function _striplinks($document)
632 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
633 ([\"\'])? # find single or double quote
634 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
635 # quote, otherwise match up to next space
636 'isx",$document,$links);
639 // catenate the non-empty matches from the conditional subpattern
641 while(list($key,$val) = each($links[2]))
647 while(list($key,$val) = each($links[3]))
657 /*======================================================================*\
659 Purpose: strip the form elements from an html document
660 Input: $document document to strip.
661 Output: $match an array of the links
662 \*======================================================================*/
664 function _stripform($document)
666 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
668 // catenate the matches
669 $match = implode("\r\n",$elements[0]);
677 /*======================================================================*\
679 Purpose: strip the text from an html document
680 Input: $document document to strip.
681 Output: $text the resulting text
682 \*======================================================================*/
684 function _striptext($document)
687 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
688 // so, list your entities one by one here. I included some of the
691 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
692 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
693 "'([\r\n])[\s]+'", // strip out white space
694 "'&(quot|#34|#034|#x22);'i", // replace html entities
695 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
696 "'&(lt|#60|#060|#x3c);'i",
697 "'&(gt|#62|#062|#x3e);'i",
698 "'&(nbsp|#160|#xa0);'i",
705 "'&(#39|#039|#x27);'",
706 "'&(euro|#8364);'i", // europe
707 "'&a(uml|UML);'", // german
715 $replace = array( "",
740 $text = preg_replace($search,$replace,$document);
745 /*======================================================================*\
746 Function: _expandlinks
747 Purpose: expand each link into a fully qualified URL
748 Input: $links the links to qualify
749 $URI the full URI to get the base from
750 Output: $expandedLinks the expanded links
751 \*======================================================================*/
753 function _expandlinks($links,$URI)
756 preg_match("/^[^\?]+/",$URI,$match);
758 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
759 $match = preg_replace("|/$|","",$match);
760 $match_part = parse_url($match);
762 $match_part["scheme"]."://".$match_part["host"];
764 $search = array( "|^http://".preg_quote($this->host)."|i",
766 "|^(?!http://)(?!mailto:)|i",
771 $replace = array( "",
778 $expandedLinks = preg_replace($search,$replace,$links);
780 return $expandedLinks;
783 /*======================================================================*\
784 Function: _httprequest
785 Purpose: go get the http data from the server
786 Input: $url the url to fetch
787 $fp the current open file pointer
789 $body body contents to send if any (POST)
791 \*======================================================================*/
793 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
795 $cookie_headers = '';
796 if($this->passcookies && $this->_redirectaddr)
799 $URI_PARTS = parse_url($URI);
802 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
803 if(!empty($this->agent))
804 $headers .= "User-Agent: ".$this->agent."\r\n";
805 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
806 $headers .= "Host: ".$this->host;
807 if(!empty($this->port) && $this->port != 80)
808 $headers .= ":".$this->port;
811 if(!empty($this->accept))
812 $headers .= "Accept: ".$this->accept."\r\n";
813 if(!empty($this->referer))
814 $headers .= "Referer: ".$this->referer."\r\n";
815 if(!empty($this->cookies))
817 if(!is_array($this->cookies))
818 $this->cookies = (array)$this->cookies;
820 reset($this->cookies);
821 if ( count($this->cookies) > 0 ) {
822 $cookie_headers .= 'Cookie: ';
823 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
824 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
826 $headers .= substr($cookie_headers,0,-2) . "\r\n";
829 if(!empty($this->rawheaders))
831 if(!is_array($this->rawheaders))
832 $this->rawheaders = (array)$this->rawheaders;
833 while(list($headerKey,$headerVal) = each($this->rawheaders))
834 $headers .= $headerKey.": ".$headerVal."\r\n";
836 if(!empty($content_type)) {
837 $headers .= "Content-type: $content_type";
838 if ($content_type == "multipart/form-data")
839 $headers .= "; boundary=".$this->_mime_boundary;
843 $headers .= "Content-length: ".strlen($body)."\r\n";
844 if(!empty($this->user) || !empty($this->pass))
845 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
847 //add proxy auth headers
848 if(!empty($this->proxy_user))
849 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
854 // set the read timeout if needed
855 if ($this->read_timeout > 0)
856 socket_set_timeout($fp, $this->read_timeout);
857 $this->timed_out = false;
859 fwrite($fp,$headers.$body,strlen($headers.$body));
861 $this->_redirectaddr = false;
862 unset($this->headers);
864 while($currentHeader = fgets($fp,$this->_maxlinelen))
866 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
872 if($currentHeader == "\r\n")
875 // if a header begins with Location: or URI:, set the redirect
876 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
878 // get URL portion of the redirect
879 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
880 // look for :// in the Location header to see if hostname is included
881 if(!preg_match("|\:\/\/|",$matches[2]))
883 // no host in the path, so prepend
884 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
885 // eliminate double slash
886 if(!preg_match("|^/|",$matches[2]))
887 $this->_redirectaddr .= "/".$matches[2];
889 $this->_redirectaddr .= $matches[2];
892 $this->_redirectaddr = $matches[2];
895 if(preg_match("|^HTTP/|",$currentHeader))
897 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
899 $this->status= $status[1];
901 $this->response_code = $currentHeader;
904 $this->headers[] = $currentHeader;
909 $_data = fread($fp, $this->maxlength);
910 if (strlen($_data) == 0) {
916 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
922 // check if there is a a redirect meta tag
924 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
927 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
930 // have we hit our frame depth and is there frame src to fetch?
931 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
933 $this->results[] = $results;
934 for($x=0; $x<count($match[1]); $x++)
935 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
937 // have we already fetched framed content?
938 elseif(is_array($this->results))
939 $this->results[] = $results;
942 $this->results = $results;
947 /*======================================================================*\
948 Function: _httpsrequest
949 Purpose: go get the https data from the server using curl
950 Input: $url the url to fetch
952 $body body contents to send if any (POST)
954 \*======================================================================*/
956 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
958 if($this->passcookies && $this->_redirectaddr)
963 $URI_PARTS = parse_url($URI);
966 // GET ... header not needed for curl
967 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
968 if(!empty($this->agent))
969 $headers[] = "User-Agent: ".$this->agent;
970 if(!empty($this->host))
971 if(!empty($this->port))
972 $headers[] = "Host: ".$this->host.":".$this->port;
974 $headers[] = "Host: ".$this->host;
975 if(!empty($this->accept))
976 $headers[] = "Accept: ".$this->accept;
977 if(!empty($this->referer))
978 $headers[] = "Referer: ".$this->referer;
979 if(!empty($this->cookies))
981 if(!is_array($this->cookies))
982 $this->cookies = (array)$this->cookies;
984 reset($this->cookies);
985 if ( count($this->cookies) > 0 ) {
986 $cookie_str = 'Cookie: ';
987 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
988 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
990 $headers[] = substr($cookie_str,0,-2);
993 if(!empty($this->rawheaders))
995 if(!is_array($this->rawheaders))
996 $this->rawheaders = (array)$this->rawheaders;
997 while(list($headerKey,$headerVal) = each($this->rawheaders))
998 $headers[] = $headerKey.": ".$headerVal;
1000 if(!empty($content_type)) {
1001 if ($content_type == "multipart/form-data")
1002 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1004 $headers[] = "Content-type: $content_type";
1007 $headers[] = "Content-length: ".strlen($body);
1008 if(!empty($this->user) || !empty($this->pass))
1009 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1011 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1012 $safer_header = strtr( $headers[$curr_header], "\"", " " );
1013 $cmdline_params .= " -H \"".$safer_header."\"";
1017 $cmdline_params .= " -d \"$body\"";
1019 if($this->read_timeout > 0)
1020 $cmdline_params .= " -m ".$this->read_timeout;
1022 $headerfile = tempnam($temp_dir, "sno");
1024 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1025 exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1029 $this->error = "Error: cURL could not retrieve the document, error $return.";
1034 $results = implode("\r\n",$results);
1036 $result_headers = file("$headerfile");
1038 $this->_redirectaddr = false;
1039 unset($this->headers);
1041 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1044 // if a header begins with Location: or URI:, set the redirect
1045 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1047 // get URL portion of the redirect
1048 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1049 // look for :// in the Location header to see if hostname is included
1050 if(!preg_match("|\:\/\/|",$matches[2]))
1052 // no host in the path, so prepend
1053 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1054 // eliminate double slash
1055 if(!preg_match("|^/|",$matches[2]))
1056 $this->_redirectaddr .= "/".$matches[2];
1058 $this->_redirectaddr .= $matches[2];
1061 $this->_redirectaddr = $matches[2];
1064 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1065 $this->response_code = $result_headers[$currentHeader];
1067 $this->headers[] = $result_headers[$currentHeader];
1070 // check if there is a a redirect meta tag
1072 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1074 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1077 // have we hit our frame depth and is there frame src to fetch?
1078 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1080 $this->results[] = $results;
1081 for($x=0; $x<count($match[1]); $x++)
1082 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1084 // have we already fetched framed content?
1085 elseif(is_array($this->results))
1086 $this->results[] = $results;
1087 // no framed content
1089 $this->results = $results;
1091 unlink("$headerfile");
1096 /*======================================================================*\
1097 Function: setcookies()
1098 Purpose: set cookies for a redirection
1099 \*======================================================================*/
1101 function setcookies()
1103 for($x=0; $x<count($this->headers); $x++)
1105 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1106 $this->cookies[$match[1]] = urldecode($match[2]);
1111 /*======================================================================*\
1112 Function: _check_timeout
1113 Purpose: checks whether timeout has occurred
1114 Input: $fp file pointer
1115 \*======================================================================*/
1117 function _check_timeout($fp)
1119 if ($this->read_timeout > 0) {
1120 $fp_status = socket_get_status($fp);
1121 if ($fp_status["timed_out"]) {
1122 $this->timed_out = true;
1129 /*======================================================================*\
1131 Purpose: make a socket connection
1132 Input: $fp file pointer
1133 \*======================================================================*/
1135 function _connect(&$fp)
1137 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1139 $this->_isproxy = true;
1141 $host = $this->proxy_host;
1142 $port = $this->proxy_port;
1146 $host = $this->host;
1147 $port = $this->port;
1160 // socket connection succeeded
1166 // socket connection failed
1167 $this->status = $errno;
1171 $this->error="socket creation failed (-3)";
1173 $this->error="dns lookup failure (-4)";
1175 $this->error="connection refused or timed out (-5)";
1177 $this->error="connection failed (".$errno.")";
1182 /*======================================================================*\
1183 Function: _disconnect
1184 Purpose: disconnect a socket connection
1185 Input: $fp file pointer
1186 \*======================================================================*/
1188 function _disconnect($fp)
1190 return(fclose($fp));
1194 /*======================================================================*\
1195 Function: _prepare_post_body
1196 Purpose: Prepare post body according to encoding type
1197 Input: $formvars - form variables
1198 $formfiles - form upload files
1200 \*======================================================================*/
1202 function _prepare_post_body($formvars, $formfiles)
1204 settype($formvars, "array");
1205 settype($formfiles, "array");
1208 if (count($formvars) == 0 && count($formfiles) == 0)
1211 switch ($this->_submit_type) {
1212 case "application/x-www-form-urlencoded":
1214 while(list($key,$val) = each($formvars)) {
1215 if (is_array($val) || is_object($val)) {
1216 while (list($cur_key, $cur_val) = each($val)) {
1217 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1220 $postdata .= urlencode($key)."=".urlencode($val)."&";
1224 case "multipart/form-data":
1225 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1228 while(list($key,$val) = each($formvars)) {
1229 if (is_array($val) || is_object($val)) {
1230 while (list($cur_key, $cur_val) = each($val)) {
1231 $postdata .= "--".$this->_mime_boundary."\r\n";
1232 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1233 $postdata .= "$cur_val\r\n";
1236 $postdata .= "--".$this->_mime_boundary."\r\n";
1237 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1238 $postdata .= "$val\r\n";
1243 while (list($field_name, $file_names) = each($formfiles)) {
1244 settype($file_names, "array");
1245 while (list(, $file_name) = each($file_names)) {
1246 if (!is_readable($file_name)) continue;
1248 $fp = fopen($file_name, "r");
1249 while (!feof($fp)) {
1250 $file_content .= fread($fp, filesize($file_name));
1253 $base_name = basename($file_name);
1255 $postdata .= "--".$this->_mime_boundary."\r\n";
1256 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1257 $postdata .= "$file_content\r\n";
1260 $postdata .= "--".$this->_mime_boundary."--\r\n";