]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-includes/class-snoopy.php
Wordpress 3.0
[autoinstalls/wordpress.git] / wp-includes / class-snoopy.php
1 <?php
2
3 /**
4  * Deprecated. Use WP_HTTP (http.php, class-http.php) instead.
5  */
6 _deprecated_file( basename( __FILE__ ), '3.0', WPINC . '/http.php' );
7
8 if ( !class_exists( 'Snoopy' ) ) :
9 /*************************************************
10
11 Snoopy - the PHP net client
12 Author: Monte Ohrt <monte@ispi.net>
13 Copyright (c): 1999-2008 New Digital Group, all rights reserved
14 Version: 1.2.4
15
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  *
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
29
30 You may contact the author of Snoopy by e-mail at:
31 monte@ohrt.com
32
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.net/
35
36 *************************************************/
37
38 class Snoopy
39 {
40         /**** Public variables ****/
41
42         /* user definable vars */
43
44         var $host                       =       "www.php.net";          // host name we are connecting to
45         var $port                       =       80;                                     // port we are connecting to
46         var $proxy_host         =       "";                                     // proxy host to use
47         var $proxy_port         =       "";                                     // proxy port to use
48         var $proxy_user         =       "";                                     // proxy user to use
49         var $proxy_pass         =       "";                                     // proxy password to use
50
51         var $agent                      =       "Snoopy v1.2.4";        // agent we masquerade as
52         var     $referer                =       "";                                     // referer info to pass
53         var $cookies            =       array();                        // array of cookies to pass
54                                                                                                 // $cookies["username"]="joe";
55         var     $rawheaders             =       array();                        // array of raw headers to send
56                                                                                                 // $rawheaders["Content-type"]="text/html";
57
58         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
59         var $lastredirectaddr   =       "";                             // contains address of last redirected address
60         var     $offsiteok              =       true;                           // allows redirection off-site
61         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
62         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
63                                                                                                 // this only applies to fetchlinks()
64                                                                                                 // submitlinks(), and submittext()
65         var $passcookies        =       true;                           // pass set cookies back through redirects
66                                                                                                 // NOTE: this currently does not respect
67                                                                                                 // dates, domains or paths.
68
69         var     $user                   =       "";                                     // user for http authentication
70         var     $pass                   =       "";                                     // password for http authentication
71
72         // http accept types
73         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
74
75         var $results            =       "";                                     // where the content is put
76
77         var $error                      =       "";                                     // error messages sent here
78         var     $response_code  =       "";                                     // response code returned from server
79         var     $headers                =       array();                        // headers returned from server sent here
80         var     $maxlength              =       500000;                         // max return data length (body)
81         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
82                                                                                                 // supported only since PHP 4 Beta 4
83                                                                                                 // set to 0 to disallow timeouts
84         var $timed_out          =       false;                          // if a read operation timed out
85         var     $status                 =       0;                                      // http request status
86
87         var $temp_dir           =       "/tmp";                         // temporary directory that the webserver
88                                                                                                 // has permission to write to.
89                                                                                                 // under Windows, this should be C:\temp
90
91         var     $curl_path              =       "/usr/local/bin/curl";
92                                                                                                 // Snoopy will use cURL for fetching
93                                                                                                 // SSL content if a full system path to
94                                                                                                 // the cURL binary is supplied here.
95                                                                                                 // set to false if you do not have
96                                                                                                 // cURL installed. See http://curl.haxx.se
97                                                                                                 // for details on installing cURL.
98                                                                                                 // Snoopy does *not* use the cURL
99                                                                                                 // library functions built into php,
100                                                                                                 // as these functions are not stable
101                                                                                                 // as of this Snoopy release.
102
103         /**** Private variables ****/
104
105         var     $_maxlinelen    =       4096;                           // max line length (headers)
106
107         var $_httpmethod        =       "GET";                          // default http request method
108         var $_httpversion       =       "HTTP/1.0";                     // default http request version
109         var $_submit_method     =       "POST";                         // default submit method
110         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
111         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
112         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
113         var $_redirectdepth     =       0;                                      // increments on an http redirect
114         var $_frameurls         =       array();                        // frame src urls
115         var $_framedepth        =       0;                                      // increments on frame depth
116
117         var $_isproxy           =       false;                          // set if using a proxy server
118         var $_fp_timeout        =       30;                                     // timeout for socket connection
119
120 /*======================================================================*\
121         Function:       fetch
122         Purpose:        fetch the contents of a web page
123                                 (and possibly other protocols in the
124                                 future like ftp, nntp, gopher, etc.)
125         Input:          $URI    the location of the page to fetch
126         Output:         $this->results  the output text from the fetch
127 \*======================================================================*/
128
129         function fetch($URI)
130         {
131
132                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
133                 $URI_PARTS = parse_url($URI);
134                 if (!empty($URI_PARTS["user"]))
135                         $this->user = $URI_PARTS["user"];
136                 if (!empty($URI_PARTS["pass"]))
137                         $this->pass = $URI_PARTS["pass"];
138                 if (empty($URI_PARTS["query"]))
139                         $URI_PARTS["query"] = '';
140                 if (empty($URI_PARTS["path"]))
141                         $URI_PARTS["path"] = '';
142
143                 switch(strtolower($URI_PARTS["scheme"]))
144                 {
145                         case "http":
146                                 $this->host = $URI_PARTS["host"];
147                                 if(!empty($URI_PARTS["port"]))
148                                         $this->port = $URI_PARTS["port"];
149                                 if($this->_connect($fp))
150                                 {
151                                         if($this->_isproxy)
152                                         {
153                                                 // using proxy, send entire URI
154                                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
155                                         }
156                                         else
157                                         {
158                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
159                                                 // no proxy, send only the path
160                                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
161                                         }
162
163                                         $this->_disconnect($fp);
164
165                                         if($this->_redirectaddr)
166                                         {
167                                                 /* url was redirected, check if we've hit the max depth */
168                                                 if($this->maxredirs > $this->_redirectdepth)
169                                                 {
170                                                         // only follow redirect if it's on this site, or offsiteok is true
171                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
172                                                         {
173                                                                 /* follow the redirect */
174                                                                 $this->_redirectdepth++;
175                                                                 $this->lastredirectaddr=$this->_redirectaddr;
176                                                                 $this->fetch($this->_redirectaddr);
177                                                         }
178                                                 }
179                                         }
180
181                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
182                                         {
183                                                 $frameurls = $this->_frameurls;
184                                                 $this->_frameurls = array();
185
186                                                 while(list(,$frameurl) = each($frameurls))
187                                                 {
188                                                         if($this->_framedepth < $this->maxframes)
189                                                         {
190                                                                 $this->fetch($frameurl);
191                                                                 $this->_framedepth++;
192                                                         }
193                                                         else
194                                                                 break;
195                                                 }
196                                         }
197                                 }
198                                 else
199                                 {
200                                         return false;
201                                 }
202                                 return true;
203                                 break;
204                         case "https":
205                                 if(!$this->curl_path)
206                                         return false;
207                                 if(function_exists("is_executable"))
208                                     if (!is_executable($this->curl_path))
209                                         return false;
210                                 $this->host = $URI_PARTS["host"];
211                                 if(!empty($URI_PARTS["port"]))
212                                         $this->port = $URI_PARTS["port"];
213                                 if($this->_isproxy)
214                                 {
215                                         // using proxy, send entire URI
216                                         $this->_httpsrequest($URI,$URI,$this->_httpmethod);
217                                 }
218                                 else
219                                 {
220                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
221                                         // no proxy, send only the path
222                                         $this->_httpsrequest($path, $URI, $this->_httpmethod);
223                                 }
224
225                                 if($this->_redirectaddr)
226                                 {
227                                         /* url was redirected, check if we've hit the max depth */
228                                         if($this->maxredirs > $this->_redirectdepth)
229                                         {
230                                                 // only follow redirect if it's on this site, or offsiteok is true
231                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
232                                                 {
233                                                         /* follow the redirect */
234                                                         $this->_redirectdepth++;
235                                                         $this->lastredirectaddr=$this->_redirectaddr;
236                                                         $this->fetch($this->_redirectaddr);
237                                                 }
238                                         }
239                                 }
240
241                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
242                                 {
243                                         $frameurls = $this->_frameurls;
244                                         $this->_frameurls = array();
245
246                                         while(list(,$frameurl) = each($frameurls))
247                                         {
248                                                 if($this->_framedepth < $this->maxframes)
249                                                 {
250                                                         $this->fetch($frameurl);
251                                                         $this->_framedepth++;
252                                                 }
253                                                 else
254                                                         break;
255                                         }
256                                 }
257                                 return true;
258                                 break;
259                         default:
260                                 // not a valid protocol
261                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
262                                 return false;
263                                 break;
264                 }
265                 return true;
266         }
267
268 /*======================================================================*\
269         Function:       submit
270         Purpose:        submit an http form
271         Input:          $URI    the location to post the data
272                                 $formvars       the formvars to use.
273                                         format: $formvars["var"] = "val";
274                                 $formfiles  an array of files to submit
275                                         format: $formfiles["var"] = "/dir/filename.ext";
276         Output:         $this->results  the text output from the post
277 \*======================================================================*/
278
279         function submit($URI, $formvars="", $formfiles="")
280         {
281                 unset($postdata);
282
283                 $postdata = $this->_prepare_post_body($formvars, $formfiles);
284
285                 $URI_PARTS = parse_url($URI);
286                 if (!empty($URI_PARTS["user"]))
287                         $this->user = $URI_PARTS["user"];
288                 if (!empty($URI_PARTS["pass"]))
289                         $this->pass = $URI_PARTS["pass"];
290                 if (empty($URI_PARTS["query"]))
291                         $URI_PARTS["query"] = '';
292                 if (empty($URI_PARTS["path"]))
293                         $URI_PARTS["path"] = '';
294
295                 switch(strtolower($URI_PARTS["scheme"]))
296                 {
297                         case "http":
298                                 $this->host = $URI_PARTS["host"];
299                                 if(!empty($URI_PARTS["port"]))
300                                         $this->port = $URI_PARTS["port"];
301                                 if($this->_connect($fp))
302                                 {
303                                         if($this->_isproxy)
304                                         {
305                                                 // using proxy, send entire URI
306                                                 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
307                                         }
308                                         else
309                                         {
310                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
311                                                 // no proxy, send only the path
312                                                 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
313                                         }
314
315                                         $this->_disconnect($fp);
316
317                                         if($this->_redirectaddr)
318                                         {
319                                                 /* url was redirected, check if we've hit the max depth */
320                                                 if($this->maxredirs > $this->_redirectdepth)
321                                                 {
322                                                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
323                                                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
324
325                                                         // only follow redirect if it's on this site, or offsiteok is true
326                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
327                                                         {
328                                                                 /* follow the redirect */
329                                                                 $this->_redirectdepth++;
330                                                                 $this->lastredirectaddr=$this->_redirectaddr;
331                                                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
332                                                                         $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
333                                                                 else
334                                                                         $this->submit($this->_redirectaddr,$formvars, $formfiles);
335                                                         }
336                                                 }
337                                         }
338
339                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
340                                         {
341                                                 $frameurls = $this->_frameurls;
342                                                 $this->_frameurls = array();
343
344                                                 while(list(,$frameurl) = each($frameurls))
345                                                 {
346                                                         if($this->_framedepth < $this->maxframes)
347                                                         {
348                                                                 $this->fetch($frameurl);
349                                                                 $this->_framedepth++;
350                                                         }
351                                                         else
352                                                                 break;
353                                                 }
354                                         }
355
356                                 }
357                                 else
358                                 {
359                                         return false;
360                                 }
361                                 return true;
362                                 break;
363                         case "https":
364                                 if(!$this->curl_path)
365                                         return false;
366                                 if(function_exists("is_executable"))
367                                     if (!is_executable($this->curl_path))
368                                         return false;
369                                 $this->host = $URI_PARTS["host"];
370                                 if(!empty($URI_PARTS["port"]))
371                                         $this->port = $URI_PARTS["port"];
372                                 if($this->_isproxy)
373                                 {
374                                         // using proxy, send entire URI
375                                         $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376                                 }
377                                 else
378                                 {
379                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
380                                         // no proxy, send only the path
381                                         $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
382                                 }
383
384                                 if($this->_redirectaddr)
385                                 {
386                                         /* url was redirected, check if we've hit the max depth */
387                                         if($this->maxredirs > $this->_redirectdepth)
388                                         {
389                                                 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
390                                                         $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
391
392                                                 // only follow redirect if it's on this site, or offsiteok is true
393                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
394                                                 {
395                                                         /* follow the redirect */
396                                                         $this->_redirectdepth++;
397                                                         $this->lastredirectaddr=$this->_redirectaddr;
398                                                         if( strpos( $this->_redirectaddr, "?" ) > 0 )
399                                                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
400                                                         else
401                                                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
402                                                 }
403                                         }
404                                 }
405
406                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
407                                 {
408                                         $frameurls = $this->_frameurls;
409                                         $this->_frameurls = array();
410
411                                         while(list(,$frameurl) = each($frameurls))
412                                         {
413                                                 if($this->_framedepth < $this->maxframes)
414                                                 {
415                                                         $this->fetch($frameurl);
416                                                         $this->_framedepth++;
417                                                 }
418                                                 else
419                                                         break;
420                                         }
421                                 }
422                                 return true;
423                                 break;
424
425                         default:
426                                 // not a valid protocol
427                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
428                                 return false;
429                                 break;
430                 }
431                 return true;
432         }
433
434 /*======================================================================*\
435         Function:       fetchlinks
436         Purpose:        fetch the links from a web page
437         Input:          $URI    where you are fetching from
438         Output:         $this->results  an array of the URLs
439 \*======================================================================*/
440
441         function fetchlinks($URI)
442         {
443                 if ($this->fetch($URI))
444                 {
445                         if($this->lastredirectaddr)
446                                 $URI = $this->lastredirectaddr;
447                         if(is_array($this->results))
448                         {
449                                 for($x=0;$x<count($this->results);$x++)
450                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
451                         }
452                         else
453                                 $this->results = $this->_striplinks($this->results);
454
455                         if($this->expandlinks)
456                                 $this->results = $this->_expandlinks($this->results, $URI);
457                         return true;
458                 }
459                 else
460                         return false;
461         }
462
463 /*======================================================================*\
464         Function:       fetchform
465         Purpose:        fetch the form elements from a web page
466         Input:          $URI    where you are fetching from
467         Output:         $this->results  the resulting html form
468 \*======================================================================*/
469
470         function fetchform($URI)
471         {
472
473                 if ($this->fetch($URI))
474                 {
475
476                         if(is_array($this->results))
477                         {
478                                 for($x=0;$x<count($this->results);$x++)
479                                         $this->results[$x] = $this->_stripform($this->results[$x]);
480                         }
481                         else
482                                 $this->results = $this->_stripform($this->results);
483
484                         return true;
485                 }
486                 else
487                         return false;
488         }
489
490
491 /*======================================================================*\
492         Function:       fetchtext
493         Purpose:        fetch the text from a web page, stripping the links
494         Input:          $URI    where you are fetching from
495         Output:         $this->results  the text from the web page
496 \*======================================================================*/
497
498         function fetchtext($URI)
499         {
500                 if($this->fetch($URI))
501                 {
502                         if(is_array($this->results))
503                         {
504                                 for($x=0;$x<count($this->results);$x++)
505                                         $this->results[$x] = $this->_striptext($this->results[$x]);
506                         }
507                         else
508                                 $this->results = $this->_striptext($this->results);
509                         return true;
510                 }
511                 else
512                         return false;
513         }
514
515 /*======================================================================*\
516         Function:       submitlinks
517         Purpose:        grab links from a form submission
518         Input:          $URI    where you are submitting from
519         Output:         $this->results  an array of the links from the post
520 \*======================================================================*/
521
522         function submitlinks($URI, $formvars="", $formfiles="")
523         {
524                 if($this->submit($URI,$formvars, $formfiles))
525                 {
526                         if($this->lastredirectaddr)
527                                 $URI = $this->lastredirectaddr;
528                         if(is_array($this->results))
529                         {
530                                 for($x=0;$x<count($this->results);$x++)
531                                 {
532                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
533                                         if($this->expandlinks)
534                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
535                                 }
536                         }
537                         else
538                         {
539                                 $this->results = $this->_striplinks($this->results);
540                                 if($this->expandlinks)
541                                         $this->results = $this->_expandlinks($this->results,$URI);
542                         }
543                         return true;
544                 }
545                 else
546                         return false;
547         }
548
549 /*======================================================================*\
550         Function:       submittext
551         Purpose:        grab text from a form submission
552         Input:          $URI    where you are submitting from
553         Output:         $this->results  the text from the web page
554 \*======================================================================*/
555
556         function submittext($URI, $formvars = "", $formfiles = "")
557         {
558                 if($this->submit($URI,$formvars, $formfiles))
559                 {
560                         if($this->lastredirectaddr)
561                                 $URI = $this->lastredirectaddr;
562                         if(is_array($this->results))
563                         {
564                                 for($x=0;$x<count($this->results);$x++)
565                                 {
566                                         $this->results[$x] = $this->_striptext($this->results[$x]);
567                                         if($this->expandlinks)
568                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
569                                 }
570                         }
571                         else
572                         {
573                                 $this->results = $this->_striptext($this->results);
574                                 if($this->expandlinks)
575                                         $this->results = $this->_expandlinks($this->results,$URI);
576                         }
577                         return true;
578                 }
579                 else
580                         return false;
581         }
582
583
584
585 /*======================================================================*\
586         Function:       set_submit_multipart
587         Purpose:        Set the form submission content type to
588                                 multipart/form-data
589 \*======================================================================*/
590         function set_submit_multipart()
591         {
592                 $this->_submit_type = "multipart/form-data";
593         }
594
595
596 /*======================================================================*\
597         Function:       set_submit_normal
598         Purpose:        Set the form submission content type to
599                                 application/x-www-form-urlencoded
600 \*======================================================================*/
601         function set_submit_normal()
602         {
603                 $this->_submit_type = "application/x-www-form-urlencoded";
604         }
605
606
607
608
609 /*======================================================================*\
610         Private functions
611 \*======================================================================*/
612
613
614 /*======================================================================*\
615         Function:       _striplinks
616         Purpose:        strip the hyperlinks from an html document
617         Input:          $document       document to strip.
618         Output:         $match          an array of the links
619 \*======================================================================*/
620
621         function _striplinks($document)
622         {
623                 preg_match_all("'<\s*a\s.*?href\s*=\s*                  # find <a href=
624                                                 ([\"\'])?                                       # find single or double quote
625                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
626                                                                                                         # quote, otherwise match up to next space
627                                                 'isx",$document,$links);
628
629
630                 // catenate the non-empty matches from the conditional subpattern
631
632                 while(list($key,$val) = each($links[2]))
633                 {
634                         if(!empty($val))
635                                 $match[] = $val;
636                 }
637
638                 while(list($key,$val) = each($links[3]))
639                 {
640                         if(!empty($val))
641                                 $match[] = $val;
642                 }
643
644                 // return the links
645                 return $match;
646         }
647
648 /*======================================================================*\
649         Function:       _stripform
650         Purpose:        strip the form elements from an html document
651         Input:          $document       document to strip.
652         Output:         $match          an array of the links
653 \*======================================================================*/
654
655         function _stripform($document)
656         {
657                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
658
659                 // catenate the matches
660                 $match = implode("\r\n",$elements[0]);
661
662                 // return the links
663                 return $match;
664         }
665
666
667
668 /*======================================================================*\
669         Function:       _striptext
670         Purpose:        strip the text from an html document
671         Input:          $document       document to strip.
672         Output:         $text           the resulting text
673 \*======================================================================*/
674
675         function _striptext($document)
676         {
677
678                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
679                 // so, list your entities one by one here. I included some of the
680                 // more common ones.
681
682                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
683                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
684                                                 "'([\r\n])[\s]+'",                                      // strip out white space
685                                                 "'&(quot|#34|#034|#x22);'i",            // replace html entities
686                                                 "'&(amp|#38|#038|#x26);'i",                     // added hexadecimal values
687                                                 "'&(lt|#60|#060|#x3c);'i",
688                                                 "'&(gt|#62|#062|#x3e);'i",
689                                                 "'&(nbsp|#160|#xa0);'i",
690                                                 "'&(iexcl|#161);'i",
691                                                 "'&(cent|#162);'i",
692                                                 "'&(pound|#163);'i",
693                                                 "'&(copy|#169);'i",
694                                                 "'&(reg|#174);'i",
695                                                 "'&(deg|#176);'i",
696                                                 "'&(#39|#039|#x27);'",
697                                                 "'&(euro|#8364);'i",                            // europe
698                                                 "'&a(uml|UML);'",                                       // german
699                                                 "'&o(uml|UML);'",
700                                                 "'&u(uml|UML);'",
701                                                 "'&A(uml|UML);'",
702                                                 "'&O(uml|UML);'",
703                                                 "'&U(uml|UML);'",
704                                                 "'&szlig;'i",
705                                                 );
706                 $replace = array(       "",
707                                                         "",
708                                                         "\\1",
709                                                         "\"",
710                                                         "&",
711                                                         "<",
712                                                         ">",
713                                                         " ",
714                                                         chr(161),
715                                                         chr(162),
716                                                         chr(163),
717                                                         chr(169),
718                                                         chr(174),
719                                                         chr(176),
720                                                         chr(39),
721                                                         chr(128),
722                                                         "ä",
723                                                         "ö",
724                                                         "ü",
725                                                         "Ä",
726                                                         "Ö",
727                                                         "Ãœ",
728                                                         "ß",
729                                                 );
730
731                 $text = preg_replace($search,$replace,$document);
732
733                 return $text;
734         }
735
736 /*======================================================================*\
737         Function:       _expandlinks
738         Purpose:        expand each link into a fully qualified URL
739         Input:          $links                  the links to qualify
740                                 $URI                    the full URI to get the base from
741         Output:         $expandedLinks  the expanded links
742 \*======================================================================*/
743
744         function _expandlinks($links,$URI)
745         {
746
747                 preg_match("/^[^\?]+/",$URI,$match);
748
749                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
750                 $match = preg_replace("|/$|","",$match);
751                 $match_part = parse_url($match);
752                 $match_root =
753                 $match_part["scheme"]."://".$match_part["host"];
754
755                 $search = array(        "|^http://".preg_quote($this->host)."|i",
756                                                         "|^(\/)|i",
757                                                         "|^(?!http://)(?!mailto:)|i",
758                                                         "|/\./|",
759                                                         "|/[^\/]+/\.\./|"
760                                                 );
761
762                 $replace = array(       "",
763                                                         $match_root."/",
764                                                         $match."/",
765                                                         "/",
766                                                         "/"
767                                                 );
768
769                 $expandedLinks = preg_replace($search,$replace,$links);
770
771                 return $expandedLinks;
772         }
773
774 /*======================================================================*\
775         Function:       _httprequest
776         Purpose:        go get the http data from the server
777         Input:          $url            the url to fetch
778                                 $fp                     the current open file pointer
779                                 $URI            the full URI
780                                 $body           body contents to send if any (POST)
781         Output:
782 \*======================================================================*/
783
784         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
785         {
786                 $cookie_headers = '';
787                 if($this->passcookies && $this->_redirectaddr)
788                         $this->setcookies();
789
790                 $URI_PARTS = parse_url($URI);
791                 if(empty($url))
792                         $url = "/";
793                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
794                 if(!empty($this->agent))
795                         $headers .= "User-Agent: ".$this->agent."\r\n";
796                 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
797                         $headers .= "Host: ".$this->host;
798                         if(!empty($this->port) && $this->port != 80)
799                                 $headers .= ":".$this->port;
800                         $headers .= "\r\n";
801                 }
802                 if(!empty($this->accept))
803                         $headers .= "Accept: ".$this->accept."\r\n";
804                 if(!empty($this->referer))
805                         $headers .= "Referer: ".$this->referer."\r\n";
806                 if(!empty($this->cookies))
807                 {
808                         if(!is_array($this->cookies))
809                                 $this->cookies = (array)$this->cookies;
810
811                         reset($this->cookies);
812                         if ( count($this->cookies) > 0 ) {
813                                 $cookie_headers .= 'Cookie: ';
814                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
815                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
816                                 }
817                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
818                         }
819                 }
820                 if(!empty($this->rawheaders))
821                 {
822                         if(!is_array($this->rawheaders))
823                                 $this->rawheaders = (array)$this->rawheaders;
824                         while(list($headerKey,$headerVal) = each($this->rawheaders))
825                                 $headers .= $headerKey.": ".$headerVal."\r\n";
826                 }
827                 if(!empty($content_type)) {
828                         $headers .= "Content-type: $content_type";
829                         if ($content_type == "multipart/form-data")
830                                 $headers .= "; boundary=".$this->_mime_boundary;
831                         $headers .= "\r\n";
832                 }
833                 if(!empty($body))
834                         $headers .= "Content-length: ".strlen($body)."\r\n";
835                 if(!empty($this->user) || !empty($this->pass))
836                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
837
838                 //add proxy auth headers
839                 if(!empty($this->proxy_user))
840                         $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
841
842
843                 $headers .= "\r\n";
844
845                 // set the read timeout if needed
846                 if ($this->read_timeout > 0)
847                         socket_set_timeout($fp, $this->read_timeout);
848                 $this->timed_out = false;
849
850                 fwrite($fp,$headers.$body,strlen($headers.$body));
851
852                 $this->_redirectaddr = false;
853                 unset($this->headers);
854
855                 while($currentHeader = fgets($fp,$this->_maxlinelen))
856                 {
857                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
858                         {
859                                 $this->status=-100;
860                                 return false;
861                         }
862
863                         if($currentHeader == "\r\n")
864                                 break;
865
866                         // if a header begins with Location: or URI:, set the redirect
867                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
868                         {
869                                 // get URL portion of the redirect
870                                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
871                                 // look for :// in the Location header to see if hostname is included
872                                 if(!preg_match("|\:\/\/|",$matches[2]))
873                                 {
874                                         // no host in the path, so prepend
875                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
876                                         // eliminate double slash
877                                         if(!preg_match("|^/|",$matches[2]))
878                                                         $this->_redirectaddr .= "/".$matches[2];
879                                         else
880                                                         $this->_redirectaddr .= $matches[2];
881                                 }
882                                 else
883                                         $this->_redirectaddr = $matches[2];
884                         }
885
886                         if(preg_match("|^HTTP/|",$currentHeader))
887                         {
888                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
889                                 {
890                                         $this->status= $status[1];
891                 }
892                                 $this->response_code = $currentHeader;
893                         }
894
895                         $this->headers[] = $currentHeader;
896                 }
897
898                 $results = '';
899                 do {
900                 $_data = fread($fp, $this->maxlength);
901                 if (strlen($_data) == 0) {
902                         break;
903                 }
904                 $results .= $_data;
905                 } while(true);
906
907                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
908                 {
909                         $this->status=-100;
910                         return false;
911                 }
912
913                 // check if there is a a redirect meta tag
914
915                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
916
917                 {
918                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
919                 }
920
921                 // have we hit our frame depth and is there frame src to fetch?
922                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
923                 {
924                         $this->results[] = $results;
925                         for($x=0; $x<count($match[1]); $x++)
926                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
927                 }
928                 // have we already fetched framed content?
929                 elseif(is_array($this->results))
930                         $this->results[] = $results;
931                 // no framed content
932                 else
933                         $this->results = $results;
934
935                 return true;
936         }
937
938 /*======================================================================*\
939         Function:       _httpsrequest
940         Purpose:        go get the https data from the server using curl
941         Input:          $url            the url to fetch
942                                 $URI            the full URI
943                                 $body           body contents to send if any (POST)
944         Output:
945 \*======================================================================*/
946
947         function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
948         {
949                 if($this->passcookies && $this->_redirectaddr)
950                         $this->setcookies();
951
952                 $headers = array();
953
954                 $URI_PARTS = parse_url($URI);
955                 if(empty($url))
956                         $url = "/";
957                 // GET ... header not needed for curl
958                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
959                 if(!empty($this->agent))
960                         $headers[] = "User-Agent: ".$this->agent;
961                 if(!empty($this->host))
962                         if(!empty($this->port))
963                                 $headers[] = "Host: ".$this->host.":".$this->port;
964                         else
965                                 $headers[] = "Host: ".$this->host;
966                 if(!empty($this->accept))
967                         $headers[] = "Accept: ".$this->accept;
968                 if(!empty($this->referer))
969                         $headers[] = "Referer: ".$this->referer;
970                 if(!empty($this->cookies))
971                 {
972                         if(!is_array($this->cookies))
973                                 $this->cookies = (array)$this->cookies;
974
975                         reset($this->cookies);
976                         if ( count($this->cookies) > 0 ) {
977                                 $cookie_str = 'Cookie: ';
978                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
979                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
980                                 }
981                                 $headers[] = substr($cookie_str,0,-2);
982                         }
983                 }
984                 if(!empty($this->rawheaders))
985                 {
986                         if(!is_array($this->rawheaders))
987                                 $this->rawheaders = (array)$this->rawheaders;
988                         while(list($headerKey,$headerVal) = each($this->rawheaders))
989                                 $headers[] = $headerKey.": ".$headerVal;
990                 }
991                 if(!empty($content_type)) {
992                         if ($content_type == "multipart/form-data")
993                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
994                         else
995                                 $headers[] = "Content-type: $content_type";
996                 }
997                 if(!empty($body))
998                         $headers[] = "Content-length: ".strlen($body);
999                 if(!empty($this->user) || !empty($this->pass))
1000                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1001
1002                 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1003                         $safer_header = strtr( $headers[$curr_header], "\"", " " );
1004                         $cmdline_params .= " -H \"".$safer_header."\"";
1005                 }
1006
1007                 if(!empty($body))
1008                         $cmdline_params .= " -d \"$body\"";
1009
1010                 if($this->read_timeout > 0)
1011                         $cmdline_params .= " -m ".$this->read_timeout;
1012
1013                 $headerfile = tempnam($temp_dir, "sno");
1014
1015                 exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1016
1017                 if($return)
1018                 {
1019                         $this->error = "Error: cURL could not retrieve the document, error $return.";
1020                         return false;
1021                 }
1022
1023
1024                 $results = implode("\r\n",$results);
1025
1026                 $result_headers = file("$headerfile");
1027
1028                 $this->_redirectaddr = false;
1029                 unset($this->headers);
1030
1031                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1032                 {
1033
1034                         // if a header begins with Location: or URI:, set the redirect
1035                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1036                         {
1037                                 // get URL portion of the redirect
1038                                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1039                                 // look for :// in the Location header to see if hostname is included
1040                                 if(!preg_match("|\:\/\/|",$matches[2]))
1041                                 {
1042                                         // no host in the path, so prepend
1043                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1044                                         // eliminate double slash
1045                                         if(!preg_match("|^/|",$matches[2]))
1046                                                         $this->_redirectaddr .= "/".$matches[2];
1047                                         else
1048                                                         $this->_redirectaddr .= $matches[2];
1049                                 }
1050                                 else
1051                                         $this->_redirectaddr = $matches[2];
1052                         }
1053
1054                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1055                                 $this->response_code = $result_headers[$currentHeader];
1056
1057                         $this->headers[] = $result_headers[$currentHeader];
1058                 }
1059
1060                 // check if there is a a redirect meta tag
1061
1062                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1063                 {
1064                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1065                 }
1066
1067                 // have we hit our frame depth and is there frame src to fetch?
1068                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1069                 {
1070                         $this->results[] = $results;
1071                         for($x=0; $x<count($match[1]); $x++)
1072                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1073                 }
1074                 // have we already fetched framed content?
1075                 elseif(is_array($this->results))
1076                         $this->results[] = $results;
1077                 // no framed content
1078                 else
1079                         $this->results = $results;
1080
1081                 unlink("$headerfile");
1082
1083                 return true;
1084         }
1085
1086 /*======================================================================*\
1087         Function:       setcookies()
1088         Purpose:        set cookies for a redirection
1089 \*======================================================================*/
1090
1091         function setcookies()
1092         {
1093                 for($x=0; $x<count($this->headers); $x++)
1094                 {
1095                 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1096                         $this->cookies[$match[1]] = urldecode($match[2]);
1097                 }
1098         }
1099
1100
1101 /*======================================================================*\
1102         Function:       _check_timeout
1103         Purpose:        checks whether timeout has occurred
1104         Input:          $fp     file pointer
1105 \*======================================================================*/
1106
1107         function _check_timeout($fp)
1108         {
1109                 if ($this->read_timeout > 0) {
1110                         $fp_status = socket_get_status($fp);
1111                         if ($fp_status["timed_out"]) {
1112                                 $this->timed_out = true;
1113                                 return true;
1114                         }
1115                 }
1116                 return false;
1117         }
1118
1119 /*======================================================================*\
1120         Function:       _connect
1121         Purpose:        make a socket connection
1122         Input:          $fp     file pointer
1123 \*======================================================================*/
1124
1125         function _connect(&$fp)
1126         {
1127                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1128                         {
1129                                 $this->_isproxy = true;
1130
1131                                 $host = $this->proxy_host;
1132                                 $port = $this->proxy_port;
1133                         }
1134                 else
1135                 {
1136                         $host = $this->host;
1137                         $port = $this->port;
1138                 }
1139
1140                 $this->status = 0;
1141
1142                 if($fp = fsockopen(
1143                                         $host,
1144                                         $port,
1145                                         $errno,
1146                                         $errstr,
1147                                         $this->_fp_timeout
1148                                         ))
1149                 {
1150                         // socket connection succeeded
1151
1152                         return true;
1153                 }
1154                 else
1155                 {
1156                         // socket connection failed
1157                         $this->status = $errno;
1158                         switch($errno)
1159                         {
1160                                 case -3:
1161                                         $this->error="socket creation failed (-3)";
1162                                 case -4:
1163                                         $this->error="dns lookup failure (-4)";
1164                                 case -5:
1165                                         $this->error="connection refused or timed out (-5)";
1166                                 default:
1167                                         $this->error="connection failed (".$errno.")";
1168                         }
1169                         return false;
1170                 }
1171         }
1172 /*======================================================================*\
1173         Function:       _disconnect
1174         Purpose:        disconnect a socket connection
1175         Input:          $fp     file pointer
1176 \*======================================================================*/
1177
1178         function _disconnect($fp)
1179         {
1180                 return(fclose($fp));
1181         }
1182
1183
1184 /*======================================================================*\
1185         Function:       _prepare_post_body
1186         Purpose:        Prepare post body according to encoding type
1187         Input:          $formvars  - form variables
1188                                 $formfiles - form upload files
1189         Output:         post body
1190 \*======================================================================*/
1191
1192         function _prepare_post_body($formvars, $formfiles)
1193         {
1194                 settype($formvars, "array");
1195                 settype($formfiles, "array");
1196                 $postdata = '';
1197
1198                 if (count($formvars) == 0 && count($formfiles) == 0)
1199                         return;
1200
1201                 switch ($this->_submit_type) {
1202                         case "application/x-www-form-urlencoded":
1203                                 reset($formvars);
1204                                 while(list($key,$val) = each($formvars)) {
1205                                         if (is_array($val) || is_object($val)) {
1206                                                 while (list($cur_key, $cur_val) = each($val)) {
1207                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1208                                                 }
1209                                         } else
1210                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
1211                                 }
1212                                 break;
1213
1214                         case "multipart/form-data":
1215                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1216
1217                                 reset($formvars);
1218                                 while(list($key,$val) = each($formvars)) {
1219                                         if (is_array($val) || is_object($val)) {
1220                                                 while (list($cur_key, $cur_val) = each($val)) {
1221                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
1222                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1223                                                         $postdata .= "$cur_val\r\n";
1224                                                 }
1225                                         } else {
1226                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1227                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1228                                                 $postdata .= "$val\r\n";
1229                                         }
1230                                 }
1231
1232                                 reset($formfiles);
1233                                 while (list($field_name, $file_names) = each($formfiles)) {
1234                                         settype($file_names, "array");
1235                                         while (list(, $file_name) = each($file_names)) {
1236                                                 if (!is_readable($file_name)) continue;
1237
1238                                                 $fp = fopen($file_name, "r");
1239                                                 $file_content = fread($fp, filesize($file_name));
1240                                                 fclose($fp);
1241                                                 $base_name = basename($file_name);
1242
1243                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1244                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1245                                                 $postdata .= "$file_content\r\n";
1246                                         }
1247                                 }
1248                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1249                                 break;
1250                 }
1251
1252                 return $postdata;
1253         }
1254 }
1255 endif;
1256 ?>