]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-includes/class-snoopy.php
Wordpress 2.0.4
[autoinstalls/wordpress.git] / wp-includes / class-snoopy.php
1 <?php
2
3 /*************************************************
4
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2000 ispi, all rights reserved
8 Version: 1.01
9
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24 You may contact the author of Snoopy by e-mail at:
25 monte@ispi.net
26
27 Or, write to:
28 Monte Ohrt
29 CTO, ispi
30 237 S. 70th suite 220
31 Lincoln, NE 68510
32
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.net/
35
36 *************************************************/
37
38 if ( !in_array('Snoopy', get_declared_classes() ) ) :
39 class Snoopy
40 {
41         /**** Public variables ****/
42         
43         /* user definable vars */
44
45         var $host                       =       "www.php.net";          // host name we are connecting to
46         var $port                       =       80;                                     // port we are connecting to
47         var $proxy_host         =       "";                                     // proxy host to use
48         var $proxy_port         =       "";                                     // proxy port to use
49         var $proxy_user         =       "";                                     // proxy user to use
50         var $proxy_pass         =       "";                                     // proxy password to use
51         
52         var $agent                      =       "Snoopy v1.2.3";        // agent we masquerade as
53         var     $referer                =       "";                                     // referer info to pass
54         var $cookies            =       array();                        // array of cookies to pass
55                                                                                                 // $cookies["username"]="joe";
56         var     $rawheaders             =       array();                        // array of raw headers to send
57                                                                                                 // $rawheaders["Content-type"]="text/html";
58
59         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
60         var $lastredirectaddr   =       "";                             // contains address of last redirected address
61         var     $offsiteok              =       true;                           // allows redirection off-site
62         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
63         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
64                                                                                                 // this only applies to fetchlinks()
65                                                                                                 // submitlinks(), and submittext()
66         var $passcookies        =       true;                           // pass set cookies back through redirects
67                                                                                                 // NOTE: this currently does not respect
68                                                                                                 // dates, domains or paths.
69         
70         var     $user                   =       "";                                     // user for http authentication
71         var     $pass                   =       "";                                     // password for http authentication
72         
73         // http accept types
74         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
75         
76         var $results            =       "";                                     // where the content is put
77                 
78         var $error                      =       "";                                     // error messages sent here
79         var     $response_code  =       "";                                     // response code returned from server
80         var     $headers                =       array();                        // headers returned from server sent here
81         var     $maxlength              =       8192;                           // max return data length (body)
82         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
83                                                                                                 // supported only since PHP 4 Beta 4
84                                                                                                 // set to 0 to disallow timeouts
85         var $timed_out          =       false;                          // if a read operation timed out
86         var     $status                 =       0;                                      // http request status
87
88         var $temp_dir           =       "/tmp";                         // temporary directory that the webserver
89                                                                                                 // has permission to write to.
90                                                                                                 // under Windows, this should be C:\temp
91
92         var     $curl_path              =       "/usr/local/bin/curl";
93                                                                                                 // Snoopy will use cURL for fetching
94                                                                                                 // SSL content if a full system path to
95                                                                                                 // the cURL binary is supplied here.
96                                                                                                 // set to false if you do not have
97                                                                                                 // cURL installed. See http://curl.haxx.se
98                                                                                                 // for details on installing cURL.
99                                                                                                 // Snoopy does *not* use the cURL
100                                                                                                 // library functions built into php,
101                                                                                                 // as these functions are not stable
102                                                                                                 // as of this Snoopy release.
103         
104         /**** Private variables ****/   
105         
106         var     $_maxlinelen    =       4096;                           // max line length (headers)
107         
108         var $_httpmethod        =       "GET";                          // default http request method
109         var $_httpversion       =       "HTTP/1.0";                     // default http request version
110         var $_submit_method     =       "POST";                         // default submit method
111         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
112         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
113         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
114         var $_redirectdepth     =       0;                                      // increments on an http redirect
115         var $_frameurls         =       array();                        // frame src urls
116         var $_framedepth        =       0;                                      // increments on frame depth
117         
118         var $_isproxy           =       false;                          // set if using a proxy server
119         var $_fp_timeout        =       30;                                     // timeout for socket connection
120
121 /*======================================================================*\
122         Function:       fetch
123         Purpose:        fetch the contents of a web page
124                                 (and possibly other protocols in the
125                                 future like ftp, nntp, gopher, etc.)
126         Input:          $URI    the location of the page to fetch
127         Output:         $this->results  the output text from the fetch
128 \*======================================================================*/
129
130         function fetch($URI)
131         {
132         
133                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
134                 $URI_PARTS = parse_url($URI);
135                 if (!empty($URI_PARTS["user"]))
136                         $this->user = $URI_PARTS["user"];
137                 if (!empty($URI_PARTS["pass"]))
138                         $this->pass = $URI_PARTS["pass"];
139                 if (empty($URI_PARTS["query"]))
140                         $URI_PARTS["query"] = '';
141                 if (empty($URI_PARTS["path"]))
142                         $URI_PARTS["path"] = '';
143                                 
144                 switch(strtolower($URI_PARTS["scheme"]))
145                 {
146                         case "http":
147                                 $this->host = $URI_PARTS["host"];
148                                 if(!empty($URI_PARTS["port"]))
149                                         $this->port = $URI_PARTS["port"];
150                                 if($this->_connect($fp))
151                                 {
152                                         if($this->_isproxy)
153                                         {
154                                                 // using proxy, send entire URI
155                                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
156                                         }
157                                         else
158                                         {
159                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
160                                                 // no proxy, send only the path
161                                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
162                                         }
163                                         
164                                         $this->_disconnect($fp);
165
166                                         if($this->_redirectaddr)
167                                         {
168                                                 /* url was redirected, check if we've hit the max depth */
169                                                 if($this->maxredirs > $this->_redirectdepth)
170                                                 {
171                                                         // only follow redirect if it's on this site, or offsiteok is true
172                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
173                                                         {
174                                                                 /* follow the redirect */
175                                                                 $this->_redirectdepth++;
176                                                                 $this->lastredirectaddr=$this->_redirectaddr;
177                                                                 $this->fetch($this->_redirectaddr);
178                                                         }
179                                                 }
180                                         }
181
182                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
183                                         {
184                                                 $frameurls = $this->_frameurls;
185                                                 $this->_frameurls = array();
186                                                 
187                                                 while(list(,$frameurl) = each($frameurls))
188                                                 {
189                                                         if($this->_framedepth < $this->maxframes)
190                                                         {
191                                                                 $this->fetch($frameurl);
192                                                                 $this->_framedepth++;
193                                                         }
194                                                         else
195                                                                 break;
196                                                 }
197                                         }                                       
198                                 }
199                                 else
200                                 {
201                                         return false;
202                                 }
203                                 return true;                                    
204                                 break;
205                         case "https":
206                                 if(!$this->curl_path)
207                                         return false;
208                                 if(function_exists("is_executable"))
209                                     if (!is_executable($this->curl_path))
210                                         return false;
211                                 $this->host = $URI_PARTS["host"];
212                                 if(!empty($URI_PARTS["port"]))
213                                         $this->port = $URI_PARTS["port"];
214                                 if($this->_isproxy)
215                                 {
216                                         // using proxy, send entire URI
217                                         $this->_httpsrequest($URI,$URI,$this->_httpmethod);
218                                 }
219                                 else
220                                 {
221                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
222                                         // no proxy, send only the path
223                                         $this->_httpsrequest($path, $URI, $this->_httpmethod);
224                                 }
225
226                                 if($this->_redirectaddr)
227                                 {
228                                         /* url was redirected, check if we've hit the max depth */
229                                         if($this->maxredirs > $this->_redirectdepth)
230                                         {
231                                                 // only follow redirect if it's on this site, or offsiteok is true
232                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
233                                                 {
234                                                         /* follow the redirect */
235                                                         $this->_redirectdepth++;
236                                                         $this->lastredirectaddr=$this->_redirectaddr;
237                                                         $this->fetch($this->_redirectaddr);
238                                                 }
239                                         }
240                                 }
241
242                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
243                                 {
244                                         $frameurls = $this->_frameurls;
245                                         $this->_frameurls = array();
246
247                                         while(list(,$frameurl) = each($frameurls))
248                                         {
249                                                 if($this->_framedepth < $this->maxframes)
250                                                 {
251                                                         $this->fetch($frameurl);
252                                                         $this->_framedepth++;
253                                                 }
254                                                 else
255                                                         break;
256                                         }
257                                 }                                       
258                                 return true;                                    
259                                 break;
260                         default:
261                                 // not a valid protocol
262                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
263                                 return false;
264                                 break;
265                 }               
266                 return true;
267         }
268
269 /*======================================================================*\
270         Function:       submit
271         Purpose:        submit an http form
272         Input:          $URI    the location to post the data
273                                 $formvars       the formvars to use.
274                                         format: $formvars["var"] = "val";
275                                 $formfiles  an array of files to submit
276                                         format: $formfiles["var"] = "/dir/filename.ext";
277         Output:         $this->results  the text output from the post
278 \*======================================================================*/
279
280         function submit($URI, $formvars="", $formfiles="")
281         {
282                 unset($postdata);
283                 
284                 $postdata = $this->_prepare_post_body($formvars, $formfiles);
285                         
286                 $URI_PARTS = parse_url($URI);
287                 if (!empty($URI_PARTS["user"]))
288                         $this->user = $URI_PARTS["user"];
289                 if (!empty($URI_PARTS["pass"]))
290                         $this->pass = $URI_PARTS["pass"];
291                 if (empty($URI_PARTS["query"]))
292                         $URI_PARTS["query"] = '';
293                 if (empty($URI_PARTS["path"]))
294                         $URI_PARTS["path"] = '';
295
296                 switch(strtolower($URI_PARTS["scheme"]))
297                 {
298                         case "http":
299                                 $this->host = $URI_PARTS["host"];
300                                 if(!empty($URI_PARTS["port"]))
301                                         $this->port = $URI_PARTS["port"];
302                                 if($this->_connect($fp))
303                                 {
304                                         if($this->_isproxy)
305                                         {
306                                                 // using proxy, send entire URI
307                                                 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
308                                         }
309                                         else
310                                         {
311                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
312                                                 // no proxy, send only the path
313                                                 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
314                                         }
315                                         
316                                         $this->_disconnect($fp);
317
318                                         if($this->_redirectaddr)
319                                         {
320                                                 /* url was redirected, check if we've hit the max depth */
321                                                 if($this->maxredirs > $this->_redirectdepth)
322                                                 {                                               
323                                                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
324                                                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                                         
325                                                         
326                                                         // only follow redirect if it's on this site, or offsiteok is true
327                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
328                                                         {
329                                                                 /* follow the redirect */
330                                                                 $this->_redirectdepth++;
331                                                                 $this->lastredirectaddr=$this->_redirectaddr;
332                                                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
333                                                                         $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
334                                                                 else
335                                                                         $this->submit($this->_redirectaddr,$formvars, $formfiles);
336                                                         }
337                                                 }
338                                         }
339
340                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
341                                         {
342                                                 $frameurls = $this->_frameurls;
343                                                 $this->_frameurls = array();
344                                                 
345                                                 while(list(,$frameurl) = each($frameurls))
346                                                 {                                                                                                               
347                                                         if($this->_framedepth < $this->maxframes)
348                                                         {
349                                                                 $this->fetch($frameurl);
350                                                                 $this->_framedepth++;
351                                                         }
352                                                         else
353                                                                 break;
354                                                 }
355                                         }                                       
356                                         
357                                 }
358                                 else
359                                 {
360                                         return false;
361                                 }
362                                 return true;                                    
363                                 break;
364                         case "https":
365                                 if(!$this->curl_path)
366                                         return false;
367                                 if(function_exists("is_executable"))
368                                     if (!is_executable($this->curl_path))
369                                         return false;
370                                 $this->host = $URI_PARTS["host"];
371                                 if(!empty($URI_PARTS["port"]))
372                                         $this->port = $URI_PARTS["port"];
373                                 if($this->_isproxy)
374                                 {
375                                         // using proxy, send entire URI
376                                         $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
377                                 }
378                                 else
379                                 {
380                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
381                                         // no proxy, send only the path
382                                         $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
383                                 }
384
385                                 if($this->_redirectaddr)
386                                 {
387                                         /* url was redirected, check if we've hit the max depth */
388                                         if($this->maxredirs > $this->_redirectdepth)
389                                         {                                               
390                                                 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
391                                                         $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                                         
392
393                                                 // only follow redirect if it's on this site, or offsiteok is true
394                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
395                                                 {
396                                                         /* follow the redirect */
397                                                         $this->_redirectdepth++;
398                                                         $this->lastredirectaddr=$this->_redirectaddr;
399                                                         if( strpos( $this->_redirectaddr, "?" ) > 0 )
400                                                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
401                                                         else
402                                                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
403                                                 }
404                                         }
405                                 }
406
407                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
408                                 {
409                                         $frameurls = $this->_frameurls;
410                                         $this->_frameurls = array();
411
412                                         while(list(,$frameurl) = each($frameurls))
413                                         {                                                                                                               
414                                                 if($this->_framedepth < $this->maxframes)
415                                                 {
416                                                         $this->fetch($frameurl);
417                                                         $this->_framedepth++;
418                                                 }
419                                                 else
420                                                         break;
421                                         }
422                                 }                                       
423                                 return true;                                    
424                                 break;
425                                 
426                         default:
427                                 // not a valid protocol
428                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
429                                 return false;
430                                 break;
431                 }               
432                 return true;
433         }
434
435 /*======================================================================*\
436         Function:       fetchlinks
437         Purpose:        fetch the links from a web page
438         Input:          $URI    where you are fetching from
439         Output:         $this->results  an array of the URLs
440 \*======================================================================*/
441
442         function fetchlinks($URI)
443         {
444                 if ($this->fetch($URI))
445                 {                       
446                         if($this->lastredirectaddr)
447                                 $URI = $this->lastredirectaddr;
448                         if(is_array($this->results))
449                         {
450                                 for($x=0;$x<count($this->results);$x++)
451                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
452                         }
453                         else
454                                 $this->results = $this->_striplinks($this->results);
455
456                         if($this->expandlinks)
457                                 $this->results = $this->_expandlinks($this->results, $URI);
458                         return true;
459                 }
460                 else
461                         return false;
462         }
463
464 /*======================================================================*\
465         Function:       fetchform
466         Purpose:        fetch the form elements from a web page
467         Input:          $URI    where you are fetching from
468         Output:         $this->results  the resulting html form
469 \*======================================================================*/
470
471         function fetchform($URI)
472         {
473                 
474                 if ($this->fetch($URI))
475                 {                       
476
477                         if(is_array($this->results))
478                         {
479                                 for($x=0;$x<count($this->results);$x++)
480                                         $this->results[$x] = $this->_stripform($this->results[$x]);
481                         }
482                         else
483                                 $this->results = $this->_stripform($this->results);
484                         
485                         return true;
486                 }
487                 else
488                         return false;
489         }
490         
491         
492 /*======================================================================*\
493         Function:       fetchtext
494         Purpose:        fetch the text from a web page, stripping the links
495         Input:          $URI    where you are fetching from
496         Output:         $this->results  the text from the web page
497 \*======================================================================*/
498
499         function fetchtext($URI)
500         {
501                 if($this->fetch($URI))
502                 {                       
503                         if(is_array($this->results))
504                         {
505                                 for($x=0;$x<count($this->results);$x++)
506                                         $this->results[$x] = $this->_striptext($this->results[$x]);
507                         }
508                         else
509                                 $this->results = $this->_striptext($this->results);
510                         return true;
511                 }
512                 else
513                         return false;
514         }
515
516 /*======================================================================*\
517         Function:       submitlinks
518         Purpose:        grab links from a form submission
519         Input:          $URI    where you are submitting from
520         Output:         $this->results  an array of the links from the post
521 \*======================================================================*/
522
523         function submitlinks($URI, $formvars="", $formfiles="")
524         {
525                 if($this->submit($URI,$formvars, $formfiles))
526                 {                       
527                         if($this->lastredirectaddr)
528                                 $URI = $this->lastredirectaddr;
529                         if(is_array($this->results))
530                         {
531                                 for($x=0;$x<count($this->results);$x++)
532                                 {
533                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
534                                         if($this->expandlinks)
535                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
536                                 }
537                         }
538                         else
539                         {
540                                 $this->results = $this->_striplinks($this->results);
541                                 if($this->expandlinks)
542                                         $this->results = $this->_expandlinks($this->results,$URI);
543                         }
544                         return true;
545                 }
546                 else
547                         return false;
548         }
549
550 /*======================================================================*\
551         Function:       submittext
552         Purpose:        grab text from a form submission
553         Input:          $URI    where you are submitting from
554         Output:         $this->results  the text from the web page
555 \*======================================================================*/
556
557         function submittext($URI, $formvars = "", $formfiles = "")
558         {
559                 if($this->submit($URI,$formvars, $formfiles))
560                 {                       
561                         if($this->lastredirectaddr)
562                                 $URI = $this->lastredirectaddr;
563                         if(is_array($this->results))
564                         {
565                                 for($x=0;$x<count($this->results);$x++)
566                                 {
567                                         $this->results[$x] = $this->_striptext($this->results[$x]);
568                                         if($this->expandlinks)
569                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
570                                 }
571                         }
572                         else
573                         {
574                                 $this->results = $this->_striptext($this->results);
575                                 if($this->expandlinks)
576                                         $this->results = $this->_expandlinks($this->results,$URI);
577                         }
578                         return true;
579                 }
580                 else
581                         return false;
582         }
583
584         
585
586 /*======================================================================*\
587         Function:       set_submit_multipart
588         Purpose:        Set the form submission content type to
589                                 multipart/form-data
590 \*======================================================================*/
591         function set_submit_multipart()
592         {
593                 $this->_submit_type = "multipart/form-data";
594         }
595
596         
597 /*======================================================================*\
598         Function:       set_submit_normal
599         Purpose:        Set the form submission content type to
600                                 application/x-www-form-urlencoded
601 \*======================================================================*/
602         function set_submit_normal()
603         {
604                 $this->_submit_type = "application/x-www-form-urlencoded";
605         }
606
607         
608         
609
610 /*======================================================================*\
611         Private functions
612 \*======================================================================*/
613         
614         
615 /*======================================================================*\
616         Function:       _striplinks
617         Purpose:        strip the hyperlinks from an html document
618         Input:          $document       document to strip.
619         Output:         $match          an array of the links
620 \*======================================================================*/
621
622         function _striplinks($document)
623         {       
624                 preg_match_all("'<\s*a\s.*?href\s*=\s*                  # find <a href=
625                                                 ([\"\'])?                                       # find single or double quote
626                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
627                                                                                                         # quote, otherwise match up to next space
628                                                 'isx",$document,$links);
629                                                 
630
631                 // catenate the non-empty matches from the conditional subpattern
632
633                 while(list($key,$val) = each($links[2]))
634                 {
635                         if(!empty($val))
636                                 $match[] = $val;
637                 }                               
638                 
639                 while(list($key,$val) = each($links[3]))
640                 {
641                         if(!empty($val))
642                                 $match[] = $val;
643                 }               
644                 
645                 // return the links
646                 return $match;
647         }
648
649 /*======================================================================*\
650         Function:       _stripform
651         Purpose:        strip the form elements from an html document
652         Input:          $document       document to strip.
653         Output:         $match          an array of the links
654 \*======================================================================*/
655
656         function _stripform($document)
657         {       
658                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
659                 
660                 // catenate the matches
661                 $match = implode("\r\n",$elements[0]);
662                                 
663                 // return the links
664                 return $match;
665         }
666
667         
668         
669 /*======================================================================*\
670         Function:       _striptext
671         Purpose:        strip the text from an html document
672         Input:          $document       document to strip.
673         Output:         $text           the resulting text
674 \*======================================================================*/
675
676         function _striptext($document)
677         {
678                 
679                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
680                 // so, list your entities one by one here. I included some of the
681                 // more common ones.
682                                                                 
683                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
684                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
685                                                 "'([\r\n])[\s]+'",                                      // strip out white space
686                                                 "'&(quot|#34|#034|#x22);'i",            // replace html entities
687                                                 "'&(amp|#38|#038|#x26);'i",                     // added hexadecimal values
688                                                 "'&(lt|#60|#060|#x3c);'i",
689                                                 "'&(gt|#62|#062|#x3e);'i",
690                                                 "'&(nbsp|#160|#xa0);'i",
691                                                 "'&(iexcl|#161);'i",
692                                                 "'&(cent|#162);'i",
693                                                 "'&(pound|#163);'i",
694                                                 "'&(copy|#169);'i",
695                                                 "'&(reg|#174);'i",
696                                                 "'&(deg|#176);'i",
697                                                 "'&(#39|#039|#x27);'",
698                                                 "'&(euro|#8364);'i",                            // europe
699                                                 "'&a(uml|UML);'",                                       // german
700                                                 "'&o(uml|UML);'",
701                                                 "'&u(uml|UML);'",
702                                                 "'&A(uml|UML);'",
703                                                 "'&O(uml|UML);'",
704                                                 "'&U(uml|UML);'",
705                                                 "'&szlig;'i",
706                                                 );
707                 $replace = array(       "",
708                                                         "",
709                                                         "\\1",
710                                                         "\"",
711                                                         "&",
712                                                         "<",
713                                                         ">",
714                                                         " ",
715                                                         chr(161),
716                                                         chr(162),
717                                                         chr(163),
718                                                         chr(169),
719                                                         chr(174),
720                                                         chr(176),
721                                                         chr(39),
722                                                         chr(128),
723                                                         "ä",
724                                                         "ö",
725                                                         "ü",
726                                                         "Ä",
727                                                         "Ö",
728                                                         "Ãœ",
729                                                         "ß",
730                                                 );
731                                         
732                 $text = preg_replace($search,$replace,$document);
733                                                                 
734                 return $text;
735         }
736
737 /*======================================================================*\
738         Function:       _expandlinks
739         Purpose:        expand each link into a fully qualified URL
740         Input:          $links                  the links to qualify
741                                 $URI                    the full URI to get the base from
742         Output:         $expandedLinks  the expanded links
743 \*======================================================================*/
744
745         function _expandlinks($links,$URI)
746         {
747                 
748                 preg_match("/^[^\?]+/",$URI,$match);
749
750                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
751                 $match = preg_replace("|/$|","",$match);
752                 $match_part = parse_url($match);
753                 $match_root =
754                 $match_part["scheme"]."://".$match_part["host"];
755                                 
756                 $search = array(        "|^http://".preg_quote($this->host)."|i",
757                                                         "|^(\/)|i",
758                                                         "|^(?!http://)(?!mailto:)|i",
759                                                         "|/\./|",
760                                                         "|/[^\/]+/\.\./|"
761                                                 );
762                                                 
763                 $replace = array(       "",
764                                                         $match_root."/",
765                                                         $match."/",
766                                                         "/",
767                                                         "/"
768                                                 );                      
769                                 
770                 $expandedLinks = preg_replace($search,$replace,$links);
771
772                 return $expandedLinks;
773         }
774
775 /*======================================================================*\
776         Function:       _httprequest
777         Purpose:        go get the http data from the server
778         Input:          $url            the url to fetch
779                                 $fp                     the current open file pointer
780                                 $URI            the full URI
781                                 $body           body contents to send if any (POST)
782         Output:         
783 \*======================================================================*/
784         
785         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
786         {
787                 $cookie_headers = '';
788                 if($this->passcookies && $this->_redirectaddr)
789                         $this->setcookies();
790                         
791                 $URI_PARTS = parse_url($URI);
792                 if(empty($url))
793                         $url = "/";
794                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";                
795                 if(!empty($this->agent))
796                         $headers .= "User-Agent: ".$this->agent."\r\n";
797                 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
798                         $headers .= "Host: ".$this->host;
799                         if(!empty($this->port))
800                                 $headers .= ":".$this->port;
801                         $headers .= "\r\n";
802                 }
803                 if(!empty($this->accept))
804                         $headers .= "Accept: ".$this->accept."\r\n";
805                 if(!empty($this->referer))
806                         $headers .= "Referer: ".$this->referer."\r\n";
807                 if(!empty($this->cookies))
808                 {                       
809                         if(!is_array($this->cookies))
810                                 $this->cookies = (array)$this->cookies;
811         
812                         reset($this->cookies);
813                         if ( count($this->cookies) > 0 ) {
814                                 $cookie_headers .= 'Cookie: ';
815                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
816                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
817                                 }
818                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
819                         } 
820                 }
821                 if(!empty($this->rawheaders))
822                 {
823                         if(!is_array($this->rawheaders))
824                                 $this->rawheaders = (array)$this->rawheaders;
825                         while(list($headerKey,$headerVal) = each($this->rawheaders))
826                                 $headers .= $headerKey.": ".$headerVal."\r\n";
827                 }
828                 if(!empty($content_type)) {
829                         $headers .= "Content-type: $content_type";
830                         if ($content_type == "multipart/form-data")
831                                 $headers .= "; boundary=".$this->_mime_boundary;
832                         $headers .= "\r\n";
833                 }
834                 if(!empty($body))       
835                         $headers .= "Content-length: ".strlen($body)."\r\n";
836                 if(!empty($this->user) || !empty($this->pass))  
837                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
838                 
839                 //add proxy auth headers
840                 if(!empty($this->proxy_user))   
841                         $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
842
843
844                 $headers .= "\r\n";
845                 
846                 // set the read timeout if needed
847                 if ($this->read_timeout > 0)
848                         socket_set_timeout($fp, $this->read_timeout);
849                 $this->timed_out = false;
850                 
851                 fwrite($fp,$headers.$body,strlen($headers.$body));
852                 
853                 $this->_redirectaddr = false;
854                 unset($this->headers);
855                                                 
856                 while($currentHeader = fgets($fp,$this->_maxlinelen))
857                 {
858                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
859                         {
860                                 $this->status=-100;
861                                 return false;
862                         }
863                                 
864                         if($currentHeader == "\r\n")
865                                 break;
866                                                 
867                         // if a header begins with Location: or URI:, set the redirect
868                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
869                         {
870                                 // get URL portion of the redirect
871                                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
872                                 // look for :// in the Location header to see if hostname is included
873                                 if(!preg_match("|\:\/\/|",$matches[2]))
874                                 {
875                                         // no host in the path, so prepend
876                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
877                                         // eliminate double slash
878                                         if(!preg_match("|^/|",$matches[2]))
879                                                         $this->_redirectaddr .= "/".$matches[2];
880                                         else
881                                                         $this->_redirectaddr .= $matches[2];
882                                 }
883                                 else
884                                         $this->_redirectaddr = $matches[2];
885                         }
886                 
887                         if(preg_match("|^HTTP/|",$currentHeader))
888                         {
889                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
890                                 {
891                                         $this->status= $status[1];
892                 }                               
893                                 $this->response_code = $currentHeader;
894                         }
895                                 
896                         $this->headers[] = $currentHeader;
897                 }
898
899                 $results = '';
900                 do {
901                 $_data = fread($fp, $this->maxlength);
902                 if (strlen($_data) == 0) {
903                         break;
904                 }
905                 $results .= $_data;
906                 } while(true);
907
908                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
909                 {
910                         $this->status=-100;
911                         return false;
912                 }
913                 
914                 // check if there is a a redirect meta tag
915                 
916                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
917
918                 {
919                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);     
920                 }
921
922                 // have we hit our frame depth and is there frame src to fetch?
923                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
924                 {
925                         $this->results[] = $results;
926                         for($x=0; $x<count($match[1]); $x++)
927                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
928                 }
929                 // have we already fetched framed content?
930                 elseif(is_array($this->results))
931                         $this->results[] = $results;
932                 // no framed content
933                 else
934                         $this->results = $results;
935                 
936                 return true;
937         }
938
939 /*======================================================================*\
940         Function:       _httpsrequest
941         Purpose:        go get the https data from the server using curl
942         Input:          $url            the url to fetch
943                                 $URI            the full URI
944                                 $body           body contents to send if any (POST)
945         Output:         
946 \*======================================================================*/
947         
948         function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
949         {
950                 if($this->passcookies && $this->_redirectaddr)
951                         $this->setcookies();
952
953                 $headers = array();             
954                                         
955                 $URI_PARTS = parse_url($URI);
956                 if(empty($url))
957                         $url = "/";
958                 // GET ... header not needed for curl
959                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;           
960                 if(!empty($this->agent))
961                         $headers[] = "User-Agent: ".$this->agent;
962                 if(!empty($this->host))
963                         if(!empty($this->port))
964                                 $headers[] = "Host: ".$this->host.":".$this->port;
965                         else
966                                 $headers[] = "Host: ".$this->host;
967                 if(!empty($this->accept))
968                         $headers[] = "Accept: ".$this->accept;
969                 if(!empty($this->referer))
970                         $headers[] = "Referer: ".$this->referer;
971                 if(!empty($this->cookies))
972                 {                       
973                         if(!is_array($this->cookies))
974                                 $this->cookies = (array)$this->cookies;
975         
976                         reset($this->cookies);
977                         if ( count($this->cookies) > 0 ) {
978                                 $cookie_str = 'Cookie: ';
979                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
980                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
981                                 }
982                                 $headers[] = substr($cookie_str,0,-2);
983                         }
984                 }
985                 if(!empty($this->rawheaders))
986                 {
987                         if(!is_array($this->rawheaders))
988                                 $this->rawheaders = (array)$this->rawheaders;
989                         while(list($headerKey,$headerVal) = each($this->rawheaders))
990                                 $headers[] = $headerKey.": ".$headerVal;
991                 }
992                 if(!empty($content_type)) {
993                         if ($content_type == "multipart/form-data")
994                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
995                         else
996                                 $headers[] = "Content-type: $content_type";
997                 }
998                 if(!empty($body))       
999                         $headers[] = "Content-length: ".strlen($body);
1000                 if(!empty($this->user) || !empty($this->pass))  
1001                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1002                         
1003                 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1004                         $safer_header = strtr( $headers[$curr_header], "\"", " " );
1005                         $cmdline_params .= " -H \"".$safer_header."\"";
1006                 }
1007                 
1008                 if(!empty($body))
1009                         $cmdline_params .= " -d \"$body\"";
1010                 
1011                 if($this->read_timeout > 0)
1012                         $cmdline_params .= " -m ".$this->read_timeout;
1013                 
1014                 $headerfile = tempnam($temp_dir, "sno");
1015
1016                 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1017                 exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1018                 
1019                 if($return)
1020                 {
1021                         $this->error = "Error: cURL could not retrieve the document, error $return.";
1022                         return false;
1023                 }
1024                         
1025                         
1026                 $results = implode("\r\n",$results);
1027                 
1028                 $result_headers = file("$headerfile");
1029                                                 
1030                 $this->_redirectaddr = false;
1031                 unset($this->headers);
1032                                                 
1033                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1034                 {
1035                         
1036                         // if a header begins with Location: or URI:, set the redirect
1037                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1038                         {
1039                                 // get URL portion of the redirect
1040                                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1041                                 // look for :// in the Location header to see if hostname is included
1042                                 if(!preg_match("|\:\/\/|",$matches[2]))
1043                                 {
1044                                         // no host in the path, so prepend
1045                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1046                                         // eliminate double slash
1047                                         if(!preg_match("|^/|",$matches[2]))
1048                                                         $this->_redirectaddr .= "/".$matches[2];
1049                                         else
1050                                                         $this->_redirectaddr .= $matches[2];
1051                                 }
1052                                 else
1053                                         $this->_redirectaddr = $matches[2];
1054                         }
1055                 
1056                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1057                                 $this->response_code = $result_headers[$currentHeader];
1058
1059                         $this->headers[] = $result_headers[$currentHeader];
1060                 }
1061
1062                 // check if there is a a redirect meta tag
1063                 
1064                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1065                 {
1066                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);     
1067                 }
1068
1069                 // have we hit our frame depth and is there frame src to fetch?
1070                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1071                 {
1072                         $this->results[] = $results;
1073                         for($x=0; $x<count($match[1]); $x++)
1074                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1075                 }
1076                 // have we already fetched framed content?
1077                 elseif(is_array($this->results))
1078                         $this->results[] = $results;
1079                 // no framed content
1080                 else
1081                         $this->results = $results;
1082
1083                 unlink("$headerfile");
1084                 
1085                 return true;
1086         }
1087
1088 /*======================================================================*\
1089         Function:       setcookies()
1090         Purpose:        set cookies for a redirection
1091 \*======================================================================*/
1092         
1093         function setcookies()
1094         {
1095                 for($x=0; $x<count($this->headers); $x++)
1096                 {
1097                 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1098                         $this->cookies[$match[1]] = urldecode($match[2]);
1099                 }
1100         }
1101
1102         
1103 /*======================================================================*\
1104         Function:       _check_timeout
1105         Purpose:        checks whether timeout has occurred
1106         Input:          $fp     file pointer
1107 \*======================================================================*/
1108
1109         function _check_timeout($fp)
1110         {
1111                 if ($this->read_timeout > 0) {
1112                         $fp_status = socket_get_status($fp);
1113                         if ($fp_status["timed_out"]) {
1114                                 $this->timed_out = true;
1115                                 return true;
1116                         }
1117                 }
1118                 return false;
1119         }
1120
1121 /*======================================================================*\
1122         Function:       _connect
1123         Purpose:        make a socket connection
1124         Input:          $fp     file pointer
1125 \*======================================================================*/
1126         
1127         function _connect(&$fp)
1128         {
1129                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1130                         {
1131                                 $this->_isproxy = true;
1132                                 
1133                                 $host = $this->proxy_host;
1134                                 $port = $this->proxy_port;
1135                         }
1136                 else
1137                 {
1138                         $host = $this->host;
1139                         $port = $this->port;
1140                 }
1141         
1142                 $this->status = 0;
1143                 
1144                 if($fp = fsockopen(
1145                                         $host,
1146                                         $port,
1147                                         $errno,
1148                                         $errstr,
1149                                         $this->_fp_timeout
1150                                         ))
1151                 {
1152                         // socket connection succeeded
1153
1154                         return true;
1155                 }
1156                 else
1157                 {
1158                         // socket connection failed
1159                         $this->status = $errno;
1160                         switch($errno)
1161                         {
1162                                 case -3:
1163                                         $this->error="socket creation failed (-3)";
1164                                 case -4:
1165                                         $this->error="dns lookup failure (-4)";
1166                                 case -5:
1167                                         $this->error="connection refused or timed out (-5)";
1168                                 default:
1169                                         $this->error="connection failed (".$errno.")";
1170                         }
1171                         return false;
1172                 }
1173         }
1174 /*======================================================================*\
1175         Function:       _disconnect
1176         Purpose:        disconnect a socket connection
1177         Input:          $fp     file pointer
1178 \*======================================================================*/
1179         
1180         function _disconnect($fp)
1181         {
1182                 return(fclose($fp));
1183         }
1184
1185         
1186 /*======================================================================*\
1187         Function:       _prepare_post_body
1188         Purpose:        Prepare post body according to encoding type
1189         Input:          $formvars  - form variables
1190                                 $formfiles - form upload files
1191         Output:         post body
1192 \*======================================================================*/
1193         
1194         function _prepare_post_body($formvars, $formfiles)
1195         {
1196                 settype($formvars, "array");
1197                 settype($formfiles, "array");
1198                 $postdata = '';
1199
1200                 if (count($formvars) == 0 && count($formfiles) == 0)
1201                         return;
1202                 
1203                 switch ($this->_submit_type) {
1204                         case "application/x-www-form-urlencoded":
1205                                 reset($formvars);
1206                                 while(list($key,$val) = each($formvars)) {
1207                                         if (is_array($val) || is_object($val)) {
1208                                                 while (list($cur_key, $cur_val) = each($val)) {
1209                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1210                                                 }
1211                                         } else
1212                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
1213                                 }
1214                                 break;
1215
1216                         case "multipart/form-data":
1217                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1218                                 
1219                                 reset($formvars);
1220                                 while(list($key,$val) = each($formvars)) {
1221                                         if (is_array($val) || is_object($val)) {
1222                                                 while (list($cur_key, $cur_val) = each($val)) {
1223                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
1224                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1225                                                         $postdata .= "$cur_val\r\n";
1226                                                 }
1227                                         } else {
1228                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1229                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1230                                                 $postdata .= "$val\r\n";
1231                                         }
1232                                 }
1233                                 
1234                                 reset($formfiles);
1235                                 while (list($field_name, $file_names) = each($formfiles)) {
1236                                         settype($file_names, "array");
1237                                         while (list(, $file_name) = each($file_names)) {
1238                                                 if (!is_readable($file_name)) continue;
1239
1240                                                 $fp = fopen($file_name, "r");
1241                                                 while (!feof($fp)) {
1242                                                         $file_content = fread($fp, filesize($file_name));
1243                                                 }
1244                                                 fclose($fp);
1245                                                 $base_name = basename($file_name);
1246
1247                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1248                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1249                                                 $postdata .= "$file_content\r\n";
1250                                         }
1251                                 }
1252                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1253                                 break;
1254                 }
1255
1256                 return $postdata;
1257         }
1258 }
1259 endif;
1260
1261 ?>