]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-includes/class-snoopy.php
Wordpress 2.5.1
[autoinstalls/wordpress.git] / wp-includes / class-snoopy.php
1 <?php
2 /**
3  * Snoopy - the PHP net client
4  * @author Monte Ohrt <monte@ispi.net>
5  * @copyright 1999-2000 ispi, all rights reserved
6  * @version 1.01
7  * @license GNU Lesser GPL
8  * @link http://snoopy.sourceforge.net/
9  * @package Snoopy
10  */
11 /*************************************************
12
13 Snoopy - the PHP net client
14 Author: Monte Ohrt <monte@ispi.net>
15 Copyright (c): 1999-2000 ispi, all rights reserved
16 Version: 1.01
17
18  * This library is free software; you can redistribute it and/or
19  * modify it under the terms of the GNU Lesser General Public
20  * License as published by the Free Software Foundation; either
21  * version 2.1 of the License, or (at your option) any later version.
22  *
23  * This library is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26  * Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public
29  * License along with this library; if not, write to the Free Software
30  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
31
32 You may contact the author of Snoopy by e-mail at:
33 monte@ispi.net
34
35 Or, write to:
36 Monte Ohrt
37 CTO, ispi
38 237 S. 70th suite 220
39 Lincoln, NE 68510
40
41 The latest version of Snoopy can be obtained from:
42 http://snoopy.sourceforge.net/
43
44 *************************************************/
45
46 if ( !in_array('Snoopy', get_declared_classes() ) ) :
47 class Snoopy
48 {
49         /**** Public variables ****/
50
51         /* user definable vars */
52
53         var $host                       =       "www.php.net";          // host name we are connecting to
54         var $port                       =       80;                                     // port we are connecting to
55         var $proxy_host         =       "";                                     // proxy host to use
56         var $proxy_port         =       "";                                     // proxy port to use
57         var $proxy_user         =       "";                                     // proxy user to use
58         var $proxy_pass         =       "";                                     // proxy password to use
59
60         var $agent                      =       "Snoopy v1.2.3";        // agent we masquerade as
61         var     $referer                =       "";                                     // referer info to pass
62         var $cookies            =       array();                        // array of cookies to pass
63                                                                                                 // $cookies["username"]="joe";
64         var     $rawheaders             =       array();                        // array of raw headers to send
65                                                                                                 // $rawheaders["Content-type"]="text/html";
66
67         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
68         var $lastredirectaddr   =       "";                             // contains address of last redirected address
69         var     $offsiteok              =       true;                           // allows redirection off-site
70         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
71         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
72                                                                                                 // this only applies to fetchlinks()
73                                                                                                 // submitlinks(), and submittext()
74         var $passcookies        =       true;                           // pass set cookies back through redirects
75                                                                                                 // NOTE: this currently does not respect
76                                                                                                 // dates, domains or paths.
77
78         var     $user                   =       "";                                     // user for http authentication
79         var     $pass                   =       "";                                     // password for http authentication
80
81         // http accept types
82         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
83
84         var $results            =       "";                                     // where the content is put
85
86         var $error                      =       "";                                     // error messages sent here
87         var     $response_code  =       "";                                     // response code returned from server
88         var     $headers                =       array();                        // headers returned from server sent here
89         var     $maxlength              =       8192;                           // max return data length (body)
90         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
91                                                                                                 // supported only since PHP 4 Beta 4
92                                                                                                 // set to 0 to disallow timeouts
93         var $timed_out          =       false;                          // if a read operation timed out
94         var     $status                 =       0;                                      // http request status
95
96         var $temp_dir           =       "/tmp";                         // temporary directory that the webserver
97                                                                                                 // has permission to write to.
98                                                                                                 // under Windows, this should be C:\temp
99
100         var     $curl_path              =       "/usr/local/bin/curl";
101                                                                                                 // Snoopy will use cURL for fetching
102                                                                                                 // SSL content if a full system path to
103                                                                                                 // the cURL binary is supplied here.
104                                                                                                 // set to false if you do not have
105                                                                                                 // cURL installed. See http://curl.haxx.se
106                                                                                                 // for details on installing cURL.
107                                                                                                 // Snoopy does *not* use the cURL
108                                                                                                 // library functions built into php,
109                                                                                                 // as these functions are not stable
110                                                                                                 // as of this Snoopy release.
111
112         /**** Private variables ****/
113
114         var     $_maxlinelen    =       4096;                           // max line length (headers)
115
116         var $_httpmethod        =       "GET";                          // default http request method
117         var $_httpversion       =       "HTTP/1.0";                     // default http request version
118         var $_submit_method     =       "POST";                         // default submit method
119         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
120         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
121         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
122         var $_redirectdepth     =       0;                                      // increments on an http redirect
123         var $_frameurls         =       array();                        // frame src urls
124         var $_framedepth        =       0;                                      // increments on frame depth
125
126         var $_isproxy           =       false;                          // set if using a proxy server
127         var $_fp_timeout        =       30;                                     // timeout for socket connection
128
129 /*======================================================================*\
130         Function:       fetch
131         Purpose:        fetch the contents of a web page
132                                 (and possibly other protocols in the
133                                 future like ftp, nntp, gopher, etc.)
134         Input:          $URI    the location of the page to fetch
135         Output:         $this->results  the output text from the fetch
136 \*======================================================================*/
137
138         function fetch($URI)
139         {
140
141                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
142                 $URI_PARTS = parse_url($URI);
143                 if (!empty($URI_PARTS["user"]))
144                         $this->user = $URI_PARTS["user"];
145                 if (!empty($URI_PARTS["pass"]))
146                         $this->pass = $URI_PARTS["pass"];
147                 if (empty($URI_PARTS["query"]))
148                         $URI_PARTS["query"] = '';
149                 if (empty($URI_PARTS["path"]))
150                         $URI_PARTS["path"] = '';
151
152                 switch(strtolower($URI_PARTS["scheme"]))
153                 {
154                         case "http":
155                                 $this->host = $URI_PARTS["host"];
156                                 if(!empty($URI_PARTS["port"]))
157                                         $this->port = $URI_PARTS["port"];
158                                 if($this->_connect($fp))
159                                 {
160                                         if($this->_isproxy)
161                                         {
162                                                 // using proxy, send entire URI
163                                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
164                                         }
165                                         else
166                                         {
167                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
168                                                 // no proxy, send only the path
169                                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
170                                         }
171
172                                         $this->_disconnect($fp);
173
174                                         if($this->_redirectaddr)
175                                         {
176                                                 /* url was redirected, check if we've hit the max depth */
177                                                 if($this->maxredirs > $this->_redirectdepth)
178                                                 {
179                                                         // only follow redirect if it's on this site, or offsiteok is true
180                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
181                                                         {
182                                                                 /* follow the redirect */
183                                                                 $this->_redirectdepth++;
184                                                                 $this->lastredirectaddr=$this->_redirectaddr;
185                                                                 $this->fetch($this->_redirectaddr);
186                                                         }
187                                                 }
188                                         }
189
190                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
191                                         {
192                                                 $frameurls = $this->_frameurls;
193                                                 $this->_frameurls = array();
194
195                                                 while(list(,$frameurl) = each($frameurls))
196                                                 {
197                                                         if($this->_framedepth < $this->maxframes)
198                                                         {
199                                                                 $this->fetch($frameurl);
200                                                                 $this->_framedepth++;
201                                                         }
202                                                         else
203                                                                 break;
204                                                 }
205                                         }
206                                 }
207                                 else
208                                 {
209                                         return false;
210                                 }
211                                 return true;
212                                 break;
213                         case "https":
214                                 if(!$this->curl_path)
215                                         return false;
216                                 if(function_exists("is_executable"))
217                                     if (!is_executable($this->curl_path))
218                                         return false;
219                                 $this->host = $URI_PARTS["host"];
220                                 if(!empty($URI_PARTS["port"]))
221                                         $this->port = $URI_PARTS["port"];
222                                 if($this->_isproxy)
223                                 {
224                                         // using proxy, send entire URI
225                                         $this->_httpsrequest($URI,$URI,$this->_httpmethod);
226                                 }
227                                 else
228                                 {
229                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
230                                         // no proxy, send only the path
231                                         $this->_httpsrequest($path, $URI, $this->_httpmethod);
232                                 }
233
234                                 if($this->_redirectaddr)
235                                 {
236                                         /* url was redirected, check if we've hit the max depth */
237                                         if($this->maxredirs > $this->_redirectdepth)
238                                         {
239                                                 // only follow redirect if it's on this site, or offsiteok is true
240                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
241                                                 {
242                                                         /* follow the redirect */
243                                                         $this->_redirectdepth++;
244                                                         $this->lastredirectaddr=$this->_redirectaddr;
245                                                         $this->fetch($this->_redirectaddr);
246                                                 }
247                                         }
248                                 }
249
250                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
251                                 {
252                                         $frameurls = $this->_frameurls;
253                                         $this->_frameurls = array();
254
255                                         while(list(,$frameurl) = each($frameurls))
256                                         {
257                                                 if($this->_framedepth < $this->maxframes)
258                                                 {
259                                                         $this->fetch($frameurl);
260                                                         $this->_framedepth++;
261                                                 }
262                                                 else
263                                                         break;
264                                         }
265                                 }
266                                 return true;
267                                 break;
268                         default:
269                                 // not a valid protocol
270                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
271                                 return false;
272                                 break;
273                 }
274                 return true;
275         }
276
277 /*======================================================================*\
278         Function:       submit
279         Purpose:        submit an http form
280         Input:          $URI    the location to post the data
281                                 $formvars       the formvars to use.
282                                         format: $formvars["var"] = "val";
283                                 $formfiles  an array of files to submit
284                                         format: $formfiles["var"] = "/dir/filename.ext";
285         Output:         $this->results  the text output from the post
286 \*======================================================================*/
287
288         function submit($URI, $formvars="", $formfiles="")
289         {
290                 unset($postdata);
291
292                 $postdata = $this->_prepare_post_body($formvars, $formfiles);
293
294                 $URI_PARTS = parse_url($URI);
295                 if (!empty($URI_PARTS["user"]))
296                         $this->user = $URI_PARTS["user"];
297                 if (!empty($URI_PARTS["pass"]))
298                         $this->pass = $URI_PARTS["pass"];
299                 if (empty($URI_PARTS["query"]))
300                         $URI_PARTS["query"] = '';
301                 if (empty($URI_PARTS["path"]))
302                         $URI_PARTS["path"] = '';
303
304                 switch(strtolower($URI_PARTS["scheme"]))
305                 {
306                         case "http":
307                                 $this->host = $URI_PARTS["host"];
308                                 if(!empty($URI_PARTS["port"]))
309                                         $this->port = $URI_PARTS["port"];
310                                 if($this->_connect($fp))
311                                 {
312                                         if($this->_isproxy)
313                                         {
314                                                 // using proxy, send entire URI
315                                                 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
316                                         }
317                                         else
318                                         {
319                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
320                                                 // no proxy, send only the path
321                                                 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
322                                         }
323
324                                         $this->_disconnect($fp);
325
326                                         if($this->_redirectaddr)
327                                         {
328                                                 /* url was redirected, check if we've hit the max depth */
329                                                 if($this->maxredirs > $this->_redirectdepth)
330                                                 {
331                                                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
332                                                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
333
334                                                         // only follow redirect if it's on this site, or offsiteok is true
335                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
336                                                         {
337                                                                 /* follow the redirect */
338                                                                 $this->_redirectdepth++;
339                                                                 $this->lastredirectaddr=$this->_redirectaddr;
340                                                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
341                                                                         $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
342                                                                 else
343                                                                         $this->submit($this->_redirectaddr,$formvars, $formfiles);
344                                                         }
345                                                 }
346                                         }
347
348                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
349                                         {
350                                                 $frameurls = $this->_frameurls;
351                                                 $this->_frameurls = array();
352
353                                                 while(list(,$frameurl) = each($frameurls))
354                                                 {
355                                                         if($this->_framedepth < $this->maxframes)
356                                                         {
357                                                                 $this->fetch($frameurl);
358                                                                 $this->_framedepth++;
359                                                         }
360                                                         else
361                                                                 break;
362                                                 }
363                                         }
364
365                                 }
366                                 else
367                                 {
368                                         return false;
369                                 }
370                                 return true;
371                                 break;
372                         case "https":
373                                 if(!$this->curl_path)
374                                         return false;
375                                 if(function_exists("is_executable"))
376                                     if (!is_executable($this->curl_path))
377                                         return false;
378                                 $this->host = $URI_PARTS["host"];
379                                 if(!empty($URI_PARTS["port"]))
380                                         $this->port = $URI_PARTS["port"];
381                                 if($this->_isproxy)
382                                 {
383                                         // using proxy, send entire URI
384                                         $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
385                                 }
386                                 else
387                                 {
388                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
389                                         // no proxy, send only the path
390                                         $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
391                                 }
392
393                                 if($this->_redirectaddr)
394                                 {
395                                         /* url was redirected, check if we've hit the max depth */
396                                         if($this->maxredirs > $this->_redirectdepth)
397                                         {
398                                                 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
399                                                         $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
400
401                                                 // only follow redirect if it's on this site, or offsiteok is true
402                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
403                                                 {
404                                                         /* follow the redirect */
405                                                         $this->_redirectdepth++;
406                                                         $this->lastredirectaddr=$this->_redirectaddr;
407                                                         if( strpos( $this->_redirectaddr, "?" ) > 0 )
408                                                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
409                                                         else
410                                                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
411                                                 }
412                                         }
413                                 }
414
415                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
416                                 {
417                                         $frameurls = $this->_frameurls;
418                                         $this->_frameurls = array();
419
420                                         while(list(,$frameurl) = each($frameurls))
421                                         {
422                                                 if($this->_framedepth < $this->maxframes)
423                                                 {
424                                                         $this->fetch($frameurl);
425                                                         $this->_framedepth++;
426                                                 }
427                                                 else
428                                                         break;
429                                         }
430                                 }
431                                 return true;
432                                 break;
433
434                         default:
435                                 // not a valid protocol
436                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
437                                 return false;
438                                 break;
439                 }
440                 return true;
441         }
442
443 /*======================================================================*\
444         Function:       fetchlinks
445         Purpose:        fetch the links from a web page
446         Input:          $URI    where you are fetching from
447         Output:         $this->results  an array of the URLs
448 \*======================================================================*/
449
450         function fetchlinks($URI)
451         {
452                 if ($this->fetch($URI))
453                 {
454                         if($this->lastredirectaddr)
455                                 $URI = $this->lastredirectaddr;
456                         if(is_array($this->results))
457                         {
458                                 for($x=0;$x<count($this->results);$x++)
459                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
460                         }
461                         else
462                                 $this->results = $this->_striplinks($this->results);
463
464                         if($this->expandlinks)
465                                 $this->results = $this->_expandlinks($this->results, $URI);
466                         return true;
467                 }
468                 else
469                         return false;
470         }
471
472 /*======================================================================*\
473         Function:       fetchform
474         Purpose:        fetch the form elements from a web page
475         Input:          $URI    where you are fetching from
476         Output:         $this->results  the resulting html form
477 \*======================================================================*/
478
479         function fetchform($URI)
480         {
481
482                 if ($this->fetch($URI))
483                 {
484
485                         if(is_array($this->results))
486                         {
487                                 for($x=0;$x<count($this->results);$x++)
488                                         $this->results[$x] = $this->_stripform($this->results[$x]);
489                         }
490                         else
491                                 $this->results = $this->_stripform($this->results);
492
493                         return true;
494                 }
495                 else
496                         return false;
497         }
498
499
500 /*======================================================================*\
501         Function:       fetchtext
502         Purpose:        fetch the text from a web page, stripping the links
503         Input:          $URI    where you are fetching from
504         Output:         $this->results  the text from the web page
505 \*======================================================================*/
506
507         function fetchtext($URI)
508         {
509                 if($this->fetch($URI))
510                 {
511                         if(is_array($this->results))
512                         {
513                                 for($x=0;$x<count($this->results);$x++)
514                                         $this->results[$x] = $this->_striptext($this->results[$x]);
515                         }
516                         else
517                                 $this->results = $this->_striptext($this->results);
518                         return true;
519                 }
520                 else
521                         return false;
522         }
523
524 /*======================================================================*\
525         Function:       submitlinks
526         Purpose:        grab links from a form submission
527         Input:          $URI    where you are submitting from
528         Output:         $this->results  an array of the links from the post
529 \*======================================================================*/
530
531         function submitlinks($URI, $formvars="", $formfiles="")
532         {
533                 if($this->submit($URI,$formvars, $formfiles))
534                 {
535                         if($this->lastredirectaddr)
536                                 $URI = $this->lastredirectaddr;
537                         if(is_array($this->results))
538                         {
539                                 for($x=0;$x<count($this->results);$x++)
540                                 {
541                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
542                                         if($this->expandlinks)
543                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
544                                 }
545                         }
546                         else
547                         {
548                                 $this->results = $this->_striplinks($this->results);
549                                 if($this->expandlinks)
550                                         $this->results = $this->_expandlinks($this->results,$URI);
551                         }
552                         return true;
553                 }
554                 else
555                         return false;
556         }
557
558 /*======================================================================*\
559         Function:       submittext
560         Purpose:        grab text from a form submission
561         Input:          $URI    where you are submitting from
562         Output:         $this->results  the text from the web page
563 \*======================================================================*/
564
565         function submittext($URI, $formvars = "", $formfiles = "")
566         {
567                 if($this->submit($URI,$formvars, $formfiles))
568                 {
569                         if($this->lastredirectaddr)
570                                 $URI = $this->lastredirectaddr;
571                         if(is_array($this->results))
572                         {
573                                 for($x=0;$x<count($this->results);$x++)
574                                 {
575                                         $this->results[$x] = $this->_striptext($this->results[$x]);
576                                         if($this->expandlinks)
577                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
578                                 }
579                         }
580                         else
581                         {
582                                 $this->results = $this->_striptext($this->results);
583                                 if($this->expandlinks)
584                                         $this->results = $this->_expandlinks($this->results,$URI);
585                         }
586                         return true;
587                 }
588                 else
589                         return false;
590         }
591
592
593
594 /*======================================================================*\
595         Function:       set_submit_multipart
596         Purpose:        Set the form submission content type to
597                                 multipart/form-data
598 \*======================================================================*/
599         function set_submit_multipart()
600         {
601                 $this->_submit_type = "multipart/form-data";
602         }
603
604
605 /*======================================================================*\
606         Function:       set_submit_normal
607         Purpose:        Set the form submission content type to
608                                 application/x-www-form-urlencoded
609 \*======================================================================*/
610         function set_submit_normal()
611         {
612                 $this->_submit_type = "application/x-www-form-urlencoded";
613         }
614
615
616
617
618 /*======================================================================*\
619         Private functions
620 \*======================================================================*/
621
622
623 /*======================================================================*\
624         Function:       _striplinks
625         Purpose:        strip the hyperlinks from an html document
626         Input:          $document       document to strip.
627         Output:         $match          an array of the links
628 \*======================================================================*/
629
630         function _striplinks($document)
631         {
632                 preg_match_all("'<\s*a\s.*?href\s*=\s*                  # find <a href=
633                                                 ([\"\'])?                                       # find single or double quote
634                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
635                                                                                                         # quote, otherwise match up to next space
636                                                 'isx",$document,$links);
637
638
639                 // catenate the non-empty matches from the conditional subpattern
640
641                 while(list($key,$val) = each($links[2]))
642                 {
643                         if(!empty($val))
644                                 $match[] = $val;
645                 }
646
647                 while(list($key,$val) = each($links[3]))
648                 {
649                         if(!empty($val))
650                                 $match[] = $val;
651                 }
652
653                 // return the links
654                 return $match;
655         }
656
657 /*======================================================================*\
658         Function:       _stripform
659         Purpose:        strip the form elements from an html document
660         Input:          $document       document to strip.
661         Output:         $match          an array of the links
662 \*======================================================================*/
663
664         function _stripform($document)
665         {
666                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
667
668                 // catenate the matches
669                 $match = implode("\r\n",$elements[0]);
670
671                 // return the links
672                 return $match;
673         }
674
675
676
677 /*======================================================================*\
678         Function:       _striptext
679         Purpose:        strip the text from an html document
680         Input:          $document       document to strip.
681         Output:         $text           the resulting text
682 \*======================================================================*/
683
684         function _striptext($document)
685         {
686
687                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
688                 // so, list your entities one by one here. I included some of the
689                 // more common ones.
690
691                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
692                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
693                                                 "'([\r\n])[\s]+'",                                      // strip out white space
694                                                 "'&(quot|#34|#034|#x22);'i",            // replace html entities
695                                                 "'&(amp|#38|#038|#x26);'i",                     // added hexadecimal values
696                                                 "'&(lt|#60|#060|#x3c);'i",
697                                                 "'&(gt|#62|#062|#x3e);'i",
698                                                 "'&(nbsp|#160|#xa0);'i",
699                                                 "'&(iexcl|#161);'i",
700                                                 "'&(cent|#162);'i",
701                                                 "'&(pound|#163);'i",
702                                                 "'&(copy|#169);'i",
703                                                 "'&(reg|#174);'i",
704                                                 "'&(deg|#176);'i",
705                                                 "'&(#39|#039|#x27);'",
706                                                 "'&(euro|#8364);'i",                            // europe
707                                                 "'&a(uml|UML);'",                                       // german
708                                                 "'&o(uml|UML);'",
709                                                 "'&u(uml|UML);'",
710                                                 "'&A(uml|UML);'",
711                                                 "'&O(uml|UML);'",
712                                                 "'&U(uml|UML);'",
713                                                 "'&szlig;'i",
714                                                 );
715                 $replace = array(       "",
716                                                         "",
717                                                         "\\1",
718                                                         "\"",
719                                                         "&",
720                                                         "<",
721                                                         ">",
722                                                         " ",
723                                                         chr(161),
724                                                         chr(162),
725                                                         chr(163),
726                                                         chr(169),
727                                                         chr(174),
728                                                         chr(176),
729                                                         chr(39),
730                                                         chr(128),
731                                                         "ä",
732                                                         "ö",
733                                                         "ü",
734                                                         "Ä",
735                                                         "Ö",
736                                                         "Ãœ",
737                                                         "ß",
738                                                 );
739
740                 $text = preg_replace($search,$replace,$document);
741
742                 return $text;
743         }
744
745 /*======================================================================*\
746         Function:       _expandlinks
747         Purpose:        expand each link into a fully qualified URL
748         Input:          $links                  the links to qualify
749                                 $URI                    the full URI to get the base from
750         Output:         $expandedLinks  the expanded links
751 \*======================================================================*/
752
753         function _expandlinks($links,$URI)
754         {
755
756                 preg_match("/^[^\?]+/",$URI,$match);
757
758                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
759                 $match = preg_replace("|/$|","",$match);
760                 $match_part = parse_url($match);
761                 $match_root =
762                 $match_part["scheme"]."://".$match_part["host"];
763
764                 $search = array(        "|^http://".preg_quote($this->host)."|i",
765                                                         "|^(\/)|i",
766                                                         "|^(?!http://)(?!mailto:)|i",
767                                                         "|/\./|",
768                                                         "|/[^\/]+/\.\./|"
769                                                 );
770
771                 $replace = array(       "",
772                                                         $match_root."/",
773                                                         $match."/",
774                                                         "/",
775                                                         "/"
776                                                 );
777
778                 $expandedLinks = preg_replace($search,$replace,$links);
779
780                 return $expandedLinks;
781         }
782
783 /*======================================================================*\
784         Function:       _httprequest
785         Purpose:        go get the http data from the server
786         Input:          $url            the url to fetch
787                                 $fp                     the current open file pointer
788                                 $URI            the full URI
789                                 $body           body contents to send if any (POST)
790         Output:
791 \*======================================================================*/
792
793         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
794         {
795                 $cookie_headers = '';
796                 if($this->passcookies && $this->_redirectaddr)
797                         $this->setcookies();
798
799                 $URI_PARTS = parse_url($URI);
800                 if(empty($url))
801                         $url = "/";
802                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
803                 if(!empty($this->agent))
804                         $headers .= "User-Agent: ".$this->agent."\r\n";
805                 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
806                         $headers .= "Host: ".$this->host;
807                         if(!empty($this->port) && $this->port != 80)
808                                 $headers .= ":".$this->port;
809                         $headers .= "\r\n";
810                 }
811                 if(!empty($this->accept))
812                         $headers .= "Accept: ".$this->accept."\r\n";
813                 if(!empty($this->referer))
814                         $headers .= "Referer: ".$this->referer."\r\n";
815                 if(!empty($this->cookies))
816                 {
817                         if(!is_array($this->cookies))
818                                 $this->cookies = (array)$this->cookies;
819
820                         reset($this->cookies);
821                         if ( count($this->cookies) > 0 ) {
822                                 $cookie_headers .= 'Cookie: ';
823                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
824                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
825                                 }
826                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
827                         }
828                 }
829                 if(!empty($this->rawheaders))
830                 {
831                         if(!is_array($this->rawheaders))
832                                 $this->rawheaders = (array)$this->rawheaders;
833                         while(list($headerKey,$headerVal) = each($this->rawheaders))
834                                 $headers .= $headerKey.": ".$headerVal."\r\n";
835                 }
836                 if(!empty($content_type)) {
837                         $headers .= "Content-type: $content_type";
838                         if ($content_type == "multipart/form-data")
839                                 $headers .= "; boundary=".$this->_mime_boundary;
840                         $headers .= "\r\n";
841                 }
842                 if(!empty($body))
843                         $headers .= "Content-length: ".strlen($body)."\r\n";
844                 if(!empty($this->user) || !empty($this->pass))
845                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
846
847                 //add proxy auth headers
848                 if(!empty($this->proxy_user))
849                         $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
850
851
852                 $headers .= "\r\n";
853
854                 // set the read timeout if needed
855                 if ($this->read_timeout > 0)
856                         socket_set_timeout($fp, $this->read_timeout);
857                 $this->timed_out = false;
858
859                 fwrite($fp,$headers.$body,strlen($headers.$body));
860
861                 $this->_redirectaddr = false;
862                 unset($this->headers);
863
864                 while($currentHeader = fgets($fp,$this->_maxlinelen))
865                 {
866                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
867                         {
868                                 $this->status=-100;
869                                 return false;
870                         }
871
872                         if($currentHeader == "\r\n")
873                                 break;
874
875                         // if a header begins with Location: or URI:, set the redirect
876                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
877                         {
878                                 // get URL portion of the redirect
879                                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
880                                 // look for :// in the Location header to see if hostname is included
881                                 if(!preg_match("|\:\/\/|",$matches[2]))
882                                 {
883                                         // no host in the path, so prepend
884                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
885                                         // eliminate double slash
886                                         if(!preg_match("|^/|",$matches[2]))
887                                                         $this->_redirectaddr .= "/".$matches[2];
888                                         else
889                                                         $this->_redirectaddr .= $matches[2];
890                                 }
891                                 else
892                                         $this->_redirectaddr = $matches[2];
893                         }
894
895                         if(preg_match("|^HTTP/|",$currentHeader))
896                         {
897                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
898                                 {
899                                         $this->status= $status[1];
900                 }
901                                 $this->response_code = $currentHeader;
902                         }
903
904                         $this->headers[] = $currentHeader;
905                 }
906
907                 $results = '';
908                 do {
909                 $_data = fread($fp, $this->maxlength);
910                 if (strlen($_data) == 0) {
911                         break;
912                 }
913                 $results .= $_data;
914                 } while(true);
915
916                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
917                 {
918                         $this->status=-100;
919                         return false;
920                 }
921
922                 // check if there is a a redirect meta tag
923
924                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
925
926                 {
927                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
928                 }
929
930                 // have we hit our frame depth and is there frame src to fetch?
931                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
932                 {
933                         $this->results[] = $results;
934                         for($x=0; $x<count($match[1]); $x++)
935                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
936                 }
937                 // have we already fetched framed content?
938                 elseif(is_array($this->results))
939                         $this->results[] = $results;
940                 // no framed content
941                 else
942                         $this->results = $results;
943
944                 return true;
945         }
946
947 /*======================================================================*\
948         Function:       _httpsrequest
949         Purpose:        go get the https data from the server using curl
950         Input:          $url            the url to fetch
951                                 $URI            the full URI
952                                 $body           body contents to send if any (POST)
953         Output:
954 \*======================================================================*/
955
956         function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
957         {
958                 if($this->passcookies && $this->_redirectaddr)
959                         $this->setcookies();
960
961                 $headers = array();
962
963                 $URI_PARTS = parse_url($URI);
964                 if(empty($url))
965                         $url = "/";
966                 // GET ... header not needed for curl
967                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
968                 if(!empty($this->agent))
969                         $headers[] = "User-Agent: ".$this->agent;
970                 if(!empty($this->host))
971                         if(!empty($this->port))
972                                 $headers[] = "Host: ".$this->host.":".$this->port;
973                         else
974                                 $headers[] = "Host: ".$this->host;
975                 if(!empty($this->accept))
976                         $headers[] = "Accept: ".$this->accept;
977                 if(!empty($this->referer))
978                         $headers[] = "Referer: ".$this->referer;
979                 if(!empty($this->cookies))
980                 {
981                         if(!is_array($this->cookies))
982                                 $this->cookies = (array)$this->cookies;
983
984                         reset($this->cookies);
985                         if ( count($this->cookies) > 0 ) {
986                                 $cookie_str = 'Cookie: ';
987                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
988                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
989                                 }
990                                 $headers[] = substr($cookie_str,0,-2);
991                         }
992                 }
993                 if(!empty($this->rawheaders))
994                 {
995                         if(!is_array($this->rawheaders))
996                                 $this->rawheaders = (array)$this->rawheaders;
997                         while(list($headerKey,$headerVal) = each($this->rawheaders))
998                                 $headers[] = $headerKey.": ".$headerVal;
999                 }
1000                 if(!empty($content_type)) {
1001                         if ($content_type == "multipart/form-data")
1002                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1003                         else
1004                                 $headers[] = "Content-type: $content_type";
1005                 }
1006                 if(!empty($body))
1007                         $headers[] = "Content-length: ".strlen($body);
1008                 if(!empty($this->user) || !empty($this->pass))
1009                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1010
1011                 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1012                         $safer_header = strtr( $headers[$curr_header], "\"", " " );
1013                         $cmdline_params .= " -H \"".$safer_header."\"";
1014                 }
1015
1016                 if(!empty($body))
1017                         $cmdline_params .= " -d \"$body\"";
1018
1019                 if($this->read_timeout > 0)
1020                         $cmdline_params .= " -m ".$this->read_timeout;
1021
1022                 $headerfile = tempnam($temp_dir, "sno");
1023
1024                 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1025                 exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1026
1027                 if($return)
1028                 {
1029                         $this->error = "Error: cURL could not retrieve the document, error $return.";
1030                         return false;
1031                 }
1032
1033
1034                 $results = implode("\r\n",$results);
1035
1036                 $result_headers = file("$headerfile");
1037
1038                 $this->_redirectaddr = false;
1039                 unset($this->headers);
1040
1041                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1042                 {
1043
1044                         // if a header begins with Location: or URI:, set the redirect
1045                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1046                         {
1047                                 // get URL portion of the redirect
1048                                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1049                                 // look for :// in the Location header to see if hostname is included
1050                                 if(!preg_match("|\:\/\/|",$matches[2]))
1051                                 {
1052                                         // no host in the path, so prepend
1053                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1054                                         // eliminate double slash
1055                                         if(!preg_match("|^/|",$matches[2]))
1056                                                         $this->_redirectaddr .= "/".$matches[2];
1057                                         else
1058                                                         $this->_redirectaddr .= $matches[2];
1059                                 }
1060                                 else
1061                                         $this->_redirectaddr = $matches[2];
1062                         }
1063
1064                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1065                                 $this->response_code = $result_headers[$currentHeader];
1066
1067                         $this->headers[] = $result_headers[$currentHeader];
1068                 }
1069
1070                 // check if there is a a redirect meta tag
1071
1072                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1073                 {
1074                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1075                 }
1076
1077                 // have we hit our frame depth and is there frame src to fetch?
1078                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1079                 {
1080                         $this->results[] = $results;
1081                         for($x=0; $x<count($match[1]); $x++)
1082                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1083                 }
1084                 // have we already fetched framed content?
1085                 elseif(is_array($this->results))
1086                         $this->results[] = $results;
1087                 // no framed content
1088                 else
1089                         $this->results = $results;
1090
1091                 unlink("$headerfile");
1092
1093                 return true;
1094         }
1095
1096 /*======================================================================*\
1097         Function:       setcookies()
1098         Purpose:        set cookies for a redirection
1099 \*======================================================================*/
1100
1101         function setcookies()
1102         {
1103                 for($x=0; $x<count($this->headers); $x++)
1104                 {
1105                 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1106                         $this->cookies[$match[1]] = urldecode($match[2]);
1107                 }
1108         }
1109
1110
1111 /*======================================================================*\
1112         Function:       _check_timeout
1113         Purpose:        checks whether timeout has occurred
1114         Input:          $fp     file pointer
1115 \*======================================================================*/
1116
1117         function _check_timeout($fp)
1118         {
1119                 if ($this->read_timeout > 0) {
1120                         $fp_status = socket_get_status($fp);
1121                         if ($fp_status["timed_out"]) {
1122                                 $this->timed_out = true;
1123                                 return true;
1124                         }
1125                 }
1126                 return false;
1127         }
1128
1129 /*======================================================================*\
1130         Function:       _connect
1131         Purpose:        make a socket connection
1132         Input:          $fp     file pointer
1133 \*======================================================================*/
1134
1135         function _connect(&$fp)
1136         {
1137                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1138                         {
1139                                 $this->_isproxy = true;
1140
1141                                 $host = $this->proxy_host;
1142                                 $port = $this->proxy_port;
1143                         }
1144                 else
1145                 {
1146                         $host = $this->host;
1147                         $port = $this->port;
1148                 }
1149
1150                 $this->status = 0;
1151
1152                 if($fp = fsockopen(
1153                                         $host,
1154                                         $port,
1155                                         $errno,
1156                                         $errstr,
1157                                         $this->_fp_timeout
1158                                         ))
1159                 {
1160                         // socket connection succeeded
1161
1162                         return true;
1163                 }
1164                 else
1165                 {
1166                         // socket connection failed
1167                         $this->status = $errno;
1168                         switch($errno)
1169                         {
1170                                 case -3:
1171                                         $this->error="socket creation failed (-3)";
1172                                 case -4:
1173                                         $this->error="dns lookup failure (-4)";
1174                                 case -5:
1175                                         $this->error="connection refused or timed out (-5)";
1176                                 default:
1177                                         $this->error="connection failed (".$errno.")";
1178                         }
1179                         return false;
1180                 }
1181         }
1182 /*======================================================================*\
1183         Function:       _disconnect
1184         Purpose:        disconnect a socket connection
1185         Input:          $fp     file pointer
1186 \*======================================================================*/
1187
1188         function _disconnect($fp)
1189         {
1190                 return(fclose($fp));
1191         }
1192
1193
1194 /*======================================================================*\
1195         Function:       _prepare_post_body
1196         Purpose:        Prepare post body according to encoding type
1197         Input:          $formvars  - form variables
1198                                 $formfiles - form upload files
1199         Output:         post body
1200 \*======================================================================*/
1201
1202         function _prepare_post_body($formvars, $formfiles)
1203         {
1204                 settype($formvars, "array");
1205                 settype($formfiles, "array");
1206                 $postdata = '';
1207
1208                 if (count($formvars) == 0 && count($formfiles) == 0)
1209                         return;
1210
1211                 switch ($this->_submit_type) {
1212                         case "application/x-www-form-urlencoded":
1213                                 reset($formvars);
1214                                 while(list($key,$val) = each($formvars)) {
1215                                         if (is_array($val) || is_object($val)) {
1216                                                 while (list($cur_key, $cur_val) = each($val)) {
1217                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1218                                                 }
1219                                         } else
1220                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
1221                                 }
1222                                 break;
1223
1224                         case "multipart/form-data":
1225                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1226
1227                                 reset($formvars);
1228                                 while(list($key,$val) = each($formvars)) {
1229                                         if (is_array($val) || is_object($val)) {
1230                                                 while (list($cur_key, $cur_val) = each($val)) {
1231                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
1232                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1233                                                         $postdata .= "$cur_val\r\n";
1234                                                 }
1235                                         } else {
1236                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1237                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1238                                                 $postdata .= "$val\r\n";
1239                                         }
1240                                 }
1241
1242                                 reset($formfiles);
1243                                 while (list($field_name, $file_names) = each($formfiles)) {
1244                                         settype($file_names, "array");
1245                                         while (list(, $file_name) = each($file_names)) {
1246                                                 if (!is_readable($file_name)) continue;
1247
1248                                                 $fp = fopen($file_name, "r");
1249                                                 while (!feof($fp)) {
1250                                                         $file_content .= fread($fp, filesize($file_name));
1251                                                 }
1252                                                 fclose($fp);
1253                                                 $base_name = basename($file_name);
1254
1255                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1256                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1257                                                 $postdata .= "$file_content\r\n";
1258                                         }
1259                                 }
1260                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1261                                 break;
1262                 }
1263
1264                 return $postdata;
1265         }
1266 }
1267 endif;
1268
1269 ?>