]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-includes/class-snoopy.php
Wordpress 2.6.2
[autoinstalls/wordpress.git] / wp-includes / class-snoopy.php
1 <?php
2 /**
3  * Snoopy - the PHP net client
4  * @author Monte Ohrt <monte@ispi.net>
5  * @copyright 1999-2000 ispi, all rights reserved
6  * @version 1.01
7  * @license GNU Lesser GPL
8  * @link http://snoopy.sourceforge.net/
9  * @package Snoopy
10  */
11
12 if ( !in_array('Snoopy', get_declared_classes() ) ) :
13 /**
14  * Snoopy - the PHP net client
15  *
16  * @author Monte Ohrt <monte@ispi.net>
17  * @copyright (c): 1999-2000 ispi, all rights reserved
18  * @version 1.01
19  *
20  * This library is free software; you can redistribute it and/or
21  * modify it under the terms of the GNU Lesser General Public
22  * License as published by the Free Software Foundation; either
23  * version 2.1 of the License, or (at your option) any later version.
24  *
25  * This library is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28  * Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public
31  * License along with this library; if not, write to the Free Software
32  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
33  *
34  * You may contact the author of Snoopy by e-mail at:
35  * monte@ispi.net
36  *
37  * Or, write to:
38  * Monte Ohrt
39  * CTO, ispi
40  * 237 S. 70th suite 220
41  * Lincoln, NE 68510
42  *
43  * @link http://snoopy.sourceforge.net/ The latest version of Snoopy can be
44  *              obtained
45  */
46 class Snoopy
47 {
48         /**** Public variables ****/
49
50         /* user definable vars */
51
52         var $host                       =       "www.php.net";          // host name we are connecting to
53         var $port                       =       80;                                     // port we are connecting to
54         var $proxy_host         =       "";                                     // proxy host to use
55         var $proxy_port         =       "";                                     // proxy port to use
56         var $proxy_user         =       "";                                     // proxy user to use
57         var $proxy_pass         =       "";                                     // proxy password to use
58
59         var $agent                      =       "Snoopy v1.2.3";        // agent we masquerade as
60         var     $referer                =       "";                                     // referer info to pass
61         var $cookies            =       array();                        // array of cookies to pass
62                                                                                                 // $cookies["username"]="joe";
63         var     $rawheaders             =       array();                        // array of raw headers to send
64                                                                                                 // $rawheaders["Content-type"]="text/html";
65
66         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
67         var $lastredirectaddr   =       "";                             // contains address of last redirected address
68         var     $offsiteok              =       true;                           // allows redirection off-site
69         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
70         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
71                                                                                                 // this only applies to fetchlinks()
72                                                                                                 // submitlinks(), and submittext()
73         var $passcookies        =       true;                           // pass set cookies back through redirects
74                                                                                                 // NOTE: this currently does not respect
75                                                                                                 // dates, domains or paths.
76
77         var     $user                   =       "";                                     // user for http authentication
78         var     $pass                   =       "";                                     // password for http authentication
79
80         // http accept types
81         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
82
83         var $results            =       "";                                     // where the content is put
84
85         var $error                      =       "";                                     // error messages sent here
86         var     $response_code  =       "";                                     // response code returned from server
87         var     $headers                =       array();                        // headers returned from server sent here
88         var     $maxlength              =       8192;                           // max return data length (body)
89         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
90                                                                                                 // supported only since PHP 4 Beta 4
91                                                                                                 // set to 0 to disallow timeouts
92         var $timed_out          =       false;                          // if a read operation timed out
93         var     $status                 =       0;                                      // http request status
94
95         var $temp_dir           =       "/tmp";                         // temporary directory that the webserver
96                                                                                                 // has permission to write to.
97                                                                                                 // under Windows, this should be C:\temp
98
99         var     $curl_path              =       "/usr/local/bin/curl";
100                                                                                                 // Snoopy will use cURL for fetching
101                                                                                                 // SSL content if a full system path to
102                                                                                                 // the cURL binary is supplied here.
103                                                                                                 // set to false if you do not have
104                                                                                                 // cURL installed. See http://curl.haxx.se
105                                                                                                 // for details on installing cURL.
106                                                                                                 // Snoopy does *not* use the cURL
107                                                                                                 // library functions built into php,
108                                                                                                 // as these functions are not stable
109                                                                                                 // as of this Snoopy release.
110
111         /**** Private variables ****/
112
113         var     $_maxlinelen    =       4096;                           // max line length (headers)
114
115         var $_httpmethod        =       "GET";                          // default http request method
116         var $_httpversion       =       "HTTP/1.0";                     // default http request version
117         var $_submit_method     =       "POST";                         // default submit method
118         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
119         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
120         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
121         var $_redirectdepth     =       0;                                      // increments on an http redirect
122         var $_frameurls         =       array();                        // frame src urls
123         var $_framedepth        =       0;                                      // increments on frame depth
124
125         var $_isproxy           =       false;                          // set if using a proxy server
126         var $_fp_timeout        =       30;                                     // timeout for socket connection
127
128 /*======================================================================*\
129         Function:       fetch
130         Purpose:        fetch the contents of a web page
131                                 (and possibly other protocols in the
132                                 future like ftp, nntp, gopher, etc.)
133         Input:          $URI    the location of the page to fetch
134         Output:         $this->results  the output text from the fetch
135 \*======================================================================*/
136
137         function fetch($URI)
138         {
139
140                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
141                 $URI_PARTS = parse_url($URI);
142                 if (!empty($URI_PARTS["user"]))
143                         $this->user = $URI_PARTS["user"];
144                 if (!empty($URI_PARTS["pass"]))
145                         $this->pass = $URI_PARTS["pass"];
146                 if (empty($URI_PARTS["query"]))
147                         $URI_PARTS["query"] = '';
148                 if (empty($URI_PARTS["path"]))
149                         $URI_PARTS["path"] = '';
150
151                 switch(strtolower($URI_PARTS["scheme"]))
152                 {
153                         case "http":
154                                 $this->host = $URI_PARTS["host"];
155                                 if(!empty($URI_PARTS["port"]))
156                                         $this->port = $URI_PARTS["port"];
157                                 if($this->_connect($fp))
158                                 {
159                                         if($this->_isproxy)
160                                         {
161                                                 // using proxy, send entire URI
162                                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
163                                         }
164                                         else
165                                         {
166                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
167                                                 // no proxy, send only the path
168                                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
169                                         }
170
171                                         $this->_disconnect($fp);
172
173                                         if($this->_redirectaddr)
174                                         {
175                                                 /* url was redirected, check if we've hit the max depth */
176                                                 if($this->maxredirs > $this->_redirectdepth)
177                                                 {
178                                                         // only follow redirect if it's on this site, or offsiteok is true
179                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
180                                                         {
181                                                                 /* follow the redirect */
182                                                                 $this->_redirectdepth++;
183                                                                 $this->lastredirectaddr=$this->_redirectaddr;
184                                                                 $this->fetch($this->_redirectaddr);
185                                                         }
186                                                 }
187                                         }
188
189                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
190                                         {
191                                                 $frameurls = $this->_frameurls;
192                                                 $this->_frameurls = array();
193
194                                                 while(list(,$frameurl) = each($frameurls))
195                                                 {
196                                                         if($this->_framedepth < $this->maxframes)
197                                                         {
198                                                                 $this->fetch($frameurl);
199                                                                 $this->_framedepth++;
200                                                         }
201                                                         else
202                                                                 break;
203                                                 }
204                                         }
205                                 }
206                                 else
207                                 {
208                                         return false;
209                                 }
210                                 return true;
211                                 break;
212                         case "https":
213                                 if(!$this->curl_path)
214                                         return false;
215                                 if(function_exists("is_executable"))
216                                     if (!is_executable($this->curl_path))
217                                         return false;
218                                 $this->host = $URI_PARTS["host"];
219                                 if(!empty($URI_PARTS["port"]))
220                                         $this->port = $URI_PARTS["port"];
221                                 if($this->_isproxy)
222                                 {
223                                         // using proxy, send entire URI
224                                         $this->_httpsrequest($URI,$URI,$this->_httpmethod);
225                                 }
226                                 else
227                                 {
228                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
229                                         // no proxy, send only the path
230                                         $this->_httpsrequest($path, $URI, $this->_httpmethod);
231                                 }
232
233                                 if($this->_redirectaddr)
234                                 {
235                                         /* url was redirected, check if we've hit the max depth */
236                                         if($this->maxredirs > $this->_redirectdepth)
237                                         {
238                                                 // only follow redirect if it's on this site, or offsiteok is true
239                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
240                                                 {
241                                                         /* follow the redirect */
242                                                         $this->_redirectdepth++;
243                                                         $this->lastredirectaddr=$this->_redirectaddr;
244                                                         $this->fetch($this->_redirectaddr);
245                                                 }
246                                         }
247                                 }
248
249                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
250                                 {
251                                         $frameurls = $this->_frameurls;
252                                         $this->_frameurls = array();
253
254                                         while(list(,$frameurl) = each($frameurls))
255                                         {
256                                                 if($this->_framedepth < $this->maxframes)
257                                                 {
258                                                         $this->fetch($frameurl);
259                                                         $this->_framedepth++;
260                                                 }
261                                                 else
262                                                         break;
263                                         }
264                                 }
265                                 return true;
266                                 break;
267                         default:
268                                 // not a valid protocol
269                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
270                                 return false;
271                                 break;
272                 }
273                 return true;
274         }
275
276 /*======================================================================*\
277         Function:       submit
278         Purpose:        submit an http form
279         Input:          $URI    the location to post the data
280                                 $formvars       the formvars to use.
281                                         format: $formvars["var"] = "val";
282                                 $formfiles  an array of files to submit
283                                         format: $formfiles["var"] = "/dir/filename.ext";
284         Output:         $this->results  the text output from the post
285 \*======================================================================*/
286
287         function submit($URI, $formvars="", $formfiles="")
288         {
289                 unset($postdata);
290
291                 $postdata = $this->_prepare_post_body($formvars, $formfiles);
292
293                 $URI_PARTS = parse_url($URI);
294                 if (!empty($URI_PARTS["user"]))
295                         $this->user = $URI_PARTS["user"];
296                 if (!empty($URI_PARTS["pass"]))
297                         $this->pass = $URI_PARTS["pass"];
298                 if (empty($URI_PARTS["query"]))
299                         $URI_PARTS["query"] = '';
300                 if (empty($URI_PARTS["path"]))
301                         $URI_PARTS["path"] = '';
302
303                 switch(strtolower($URI_PARTS["scheme"]))
304                 {
305                         case "http":
306                                 $this->host = $URI_PARTS["host"];
307                                 if(!empty($URI_PARTS["port"]))
308                                         $this->port = $URI_PARTS["port"];
309                                 if($this->_connect($fp))
310                                 {
311                                         if($this->_isproxy)
312                                         {
313                                                 // using proxy, send entire URI
314                                                 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
315                                         }
316                                         else
317                                         {
318                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
319                                                 // no proxy, send only the path
320                                                 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
321                                         }
322
323                                         $this->_disconnect($fp);
324
325                                         if($this->_redirectaddr)
326                                         {
327                                                 /* url was redirected, check if we've hit the max depth */
328                                                 if($this->maxredirs > $this->_redirectdepth)
329                                                 {
330                                                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
331                                                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
332
333                                                         // only follow redirect if it's on this site, or offsiteok is true
334                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
335                                                         {
336                                                                 /* follow the redirect */
337                                                                 $this->_redirectdepth++;
338                                                                 $this->lastredirectaddr=$this->_redirectaddr;
339                                                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
340                                                                         $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
341                                                                 else
342                                                                         $this->submit($this->_redirectaddr,$formvars, $formfiles);
343                                                         }
344                                                 }
345                                         }
346
347                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
348                                         {
349                                                 $frameurls = $this->_frameurls;
350                                                 $this->_frameurls = array();
351
352                                                 while(list(,$frameurl) = each($frameurls))
353                                                 {
354                                                         if($this->_framedepth < $this->maxframes)
355                                                         {
356                                                                 $this->fetch($frameurl);
357                                                                 $this->_framedepth++;
358                                                         }
359                                                         else
360                                                                 break;
361                                                 }
362                                         }
363
364                                 }
365                                 else
366                                 {
367                                         return false;
368                                 }
369                                 return true;
370                                 break;
371                         case "https":
372                                 if(!$this->curl_path)
373                                         return false;
374                                 if(function_exists("is_executable"))
375                                     if (!is_executable($this->curl_path))
376                                         return false;
377                                 $this->host = $URI_PARTS["host"];
378                                 if(!empty($URI_PARTS["port"]))
379                                         $this->port = $URI_PARTS["port"];
380                                 if($this->_isproxy)
381                                 {
382                                         // using proxy, send entire URI
383                                         $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
384                                 }
385                                 else
386                                 {
387                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
388                                         // no proxy, send only the path
389                                         $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
390                                 }
391
392                                 if($this->_redirectaddr)
393                                 {
394                                         /* url was redirected, check if we've hit the max depth */
395                                         if($this->maxredirs > $this->_redirectdepth)
396                                         {
397                                                 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
398                                                         $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
399
400                                                 // only follow redirect if it's on this site, or offsiteok is true
401                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
402                                                 {
403                                                         /* follow the redirect */
404                                                         $this->_redirectdepth++;
405                                                         $this->lastredirectaddr=$this->_redirectaddr;
406                                                         if( strpos( $this->_redirectaddr, "?" ) > 0 )
407                                                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
408                                                         else
409                                                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
410                                                 }
411                                         }
412                                 }
413
414                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
415                                 {
416                                         $frameurls = $this->_frameurls;
417                                         $this->_frameurls = array();
418
419                                         while(list(,$frameurl) = each($frameurls))
420                                         {
421                                                 if($this->_framedepth < $this->maxframes)
422                                                 {
423                                                         $this->fetch($frameurl);
424                                                         $this->_framedepth++;
425                                                 }
426                                                 else
427                                                         break;
428                                         }
429                                 }
430                                 return true;
431                                 break;
432
433                         default:
434                                 // not a valid protocol
435                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
436                                 return false;
437                                 break;
438                 }
439                 return true;
440         }
441
442 /*======================================================================*\
443         Function:       fetchlinks
444         Purpose:        fetch the links from a web page
445         Input:          $URI    where you are fetching from
446         Output:         $this->results  an array of the URLs
447 \*======================================================================*/
448
449         function fetchlinks($URI)
450         {
451                 if ($this->fetch($URI))
452                 {
453                         if($this->lastredirectaddr)
454                                 $URI = $this->lastredirectaddr;
455                         if(is_array($this->results))
456                         {
457                                 for($x=0;$x<count($this->results);$x++)
458                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
459                         }
460                         else
461                                 $this->results = $this->_striplinks($this->results);
462
463                         if($this->expandlinks)
464                                 $this->results = $this->_expandlinks($this->results, $URI);
465                         return true;
466                 }
467                 else
468                         return false;
469         }
470
471 /*======================================================================*\
472         Function:       fetchform
473         Purpose:        fetch the form elements from a web page
474         Input:          $URI    where you are fetching from
475         Output:         $this->results  the resulting html form
476 \*======================================================================*/
477
478         function fetchform($URI)
479         {
480
481                 if ($this->fetch($URI))
482                 {
483
484                         if(is_array($this->results))
485                         {
486                                 for($x=0;$x<count($this->results);$x++)
487                                         $this->results[$x] = $this->_stripform($this->results[$x]);
488                         }
489                         else
490                                 $this->results = $this->_stripform($this->results);
491
492                         return true;
493                 }
494                 else
495                         return false;
496         }
497
498
499 /*======================================================================*\
500         Function:       fetchtext
501         Purpose:        fetch the text from a web page, stripping the links
502         Input:          $URI    where you are fetching from
503         Output:         $this->results  the text from the web page
504 \*======================================================================*/
505
506         function fetchtext($URI)
507         {
508                 if($this->fetch($URI))
509                 {
510                         if(is_array($this->results))
511                         {
512                                 for($x=0;$x<count($this->results);$x++)
513                                         $this->results[$x] = $this->_striptext($this->results[$x]);
514                         }
515                         else
516                                 $this->results = $this->_striptext($this->results);
517                         return true;
518                 }
519                 else
520                         return false;
521         }
522
523 /*======================================================================*\
524         Function:       submitlinks
525         Purpose:        grab links from a form submission
526         Input:          $URI    where you are submitting from
527         Output:         $this->results  an array of the links from the post
528 \*======================================================================*/
529
530         function submitlinks($URI, $formvars="", $formfiles="")
531         {
532                 if($this->submit($URI,$formvars, $formfiles))
533                 {
534                         if($this->lastredirectaddr)
535                                 $URI = $this->lastredirectaddr;
536                         if(is_array($this->results))
537                         {
538                                 for($x=0;$x<count($this->results);$x++)
539                                 {
540                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
541                                         if($this->expandlinks)
542                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
543                                 }
544                         }
545                         else
546                         {
547                                 $this->results = $this->_striplinks($this->results);
548                                 if($this->expandlinks)
549                                         $this->results = $this->_expandlinks($this->results,$URI);
550                         }
551                         return true;
552                 }
553                 else
554                         return false;
555         }
556
557 /*======================================================================*\
558         Function:       submittext
559         Purpose:        grab text from a form submission
560         Input:          $URI    where you are submitting from
561         Output:         $this->results  the text from the web page
562 \*======================================================================*/
563
564         function submittext($URI, $formvars = "", $formfiles = "")
565         {
566                 if($this->submit($URI,$formvars, $formfiles))
567                 {
568                         if($this->lastredirectaddr)
569                                 $URI = $this->lastredirectaddr;
570                         if(is_array($this->results))
571                         {
572                                 for($x=0;$x<count($this->results);$x++)
573                                 {
574                                         $this->results[$x] = $this->_striptext($this->results[$x]);
575                                         if($this->expandlinks)
576                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
577                                 }
578                         }
579                         else
580                         {
581                                 $this->results = $this->_striptext($this->results);
582                                 if($this->expandlinks)
583                                         $this->results = $this->_expandlinks($this->results,$URI);
584                         }
585                         return true;
586                 }
587                 else
588                         return false;
589         }
590
591
592
593 /*======================================================================*\
594         Function:       set_submit_multipart
595         Purpose:        Set the form submission content type to
596                                 multipart/form-data
597 \*======================================================================*/
598         function set_submit_multipart()
599         {
600                 $this->_submit_type = "multipart/form-data";
601         }
602
603
604 /*======================================================================*\
605         Function:       set_submit_normal
606         Purpose:        Set the form submission content type to
607                                 application/x-www-form-urlencoded
608 \*======================================================================*/
609         function set_submit_normal()
610         {
611                 $this->_submit_type = "application/x-www-form-urlencoded";
612         }
613
614
615
616
617 /*======================================================================*\
618         Private functions
619 \*======================================================================*/
620
621
622 /*======================================================================*\
623         Function:       _striplinks
624         Purpose:        strip the hyperlinks from an html document
625         Input:          $document       document to strip.
626         Output:         $match          an array of the links
627 \*======================================================================*/
628
629         function _striplinks($document)
630         {
631                 preg_match_all("'<\s*a\s.*?href\s*=\s*                  # find <a href=
632                                                 ([\"\'])?                                       # find single or double quote
633                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
634                                                                                                         # quote, otherwise match up to next space
635                                                 'isx",$document,$links);
636
637
638                 // catenate the non-empty matches from the conditional subpattern
639
640                 while(list($key,$val) = each($links[2]))
641                 {
642                         if(!empty($val))
643                                 $match[] = $val;
644                 }
645
646                 while(list($key,$val) = each($links[3]))
647                 {
648                         if(!empty($val))
649                                 $match[] = $val;
650                 }
651
652                 // return the links
653                 return $match;
654         }
655
656 /*======================================================================*\
657         Function:       _stripform
658         Purpose:        strip the form elements from an html document
659         Input:          $document       document to strip.
660         Output:         $match          an array of the links
661 \*======================================================================*/
662
663         function _stripform($document)
664         {
665                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
666
667                 // catenate the matches
668                 $match = implode("\r\n",$elements[0]);
669
670                 // return the links
671                 return $match;
672         }
673
674
675
676 /*======================================================================*\
677         Function:       _striptext
678         Purpose:        strip the text from an html document
679         Input:          $document       document to strip.
680         Output:         $text           the resulting text
681 \*======================================================================*/
682
683         function _striptext($document)
684         {
685
686                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
687                 // so, list your entities one by one here. I included some of the
688                 // more common ones.
689
690                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
691                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
692                                                 "'([\r\n])[\s]+'",                                      // strip out white space
693                                                 "'&(quot|#34|#034|#x22);'i",            // replace html entities
694                                                 "'&(amp|#38|#038|#x26);'i",                     // added hexadecimal values
695                                                 "'&(lt|#60|#060|#x3c);'i",
696                                                 "'&(gt|#62|#062|#x3e);'i",
697                                                 "'&(nbsp|#160|#xa0);'i",
698                                                 "'&(iexcl|#161);'i",
699                                                 "'&(cent|#162);'i",
700                                                 "'&(pound|#163);'i",
701                                                 "'&(copy|#169);'i",
702                                                 "'&(reg|#174);'i",
703                                                 "'&(deg|#176);'i",
704                                                 "'&(#39|#039|#x27);'",
705                                                 "'&(euro|#8364);'i",                            // europe
706                                                 "'&a(uml|UML);'",                                       // german
707                                                 "'&o(uml|UML);'",
708                                                 "'&u(uml|UML);'",
709                                                 "'&A(uml|UML);'",
710                                                 "'&O(uml|UML);'",
711                                                 "'&U(uml|UML);'",
712                                                 "'&szlig;'i",
713                                                 );
714                 $replace = array(       "",
715                                                         "",
716                                                         "\\1",
717                                                         "\"",
718                                                         "&",
719                                                         "<",
720                                                         ">",
721                                                         " ",
722                                                         chr(161),
723                                                         chr(162),
724                                                         chr(163),
725                                                         chr(169),
726                                                         chr(174),
727                                                         chr(176),
728                                                         chr(39),
729                                                         chr(128),
730                                                         "ä",
731                                                         "ö",
732                                                         "ü",
733                                                         "Ä",
734                                                         "Ö",
735                                                         "Ãœ",
736                                                         "ß",
737                                                 );
738
739                 $text = preg_replace($search,$replace,$document);
740
741                 return $text;
742         }
743
744 /*======================================================================*\
745         Function:       _expandlinks
746         Purpose:        expand each link into a fully qualified URL
747         Input:          $links                  the links to qualify
748                                 $URI                    the full URI to get the base from
749         Output:         $expandedLinks  the expanded links
750 \*======================================================================*/
751
752         function _expandlinks($links,$URI)
753         {
754
755                 preg_match("/^[^\?]+/",$URI,$match);
756
757                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
758                 $match = preg_replace("|/$|","",$match);
759                 $match_part = parse_url($match);
760                 $match_root =
761                 $match_part["scheme"]."://".$match_part["host"];
762
763                 $search = array(        "|^http://".preg_quote($this->host)."|i",
764                                                         "|^(\/)|i",
765                                                         "|^(?!http://)(?!mailto:)|i",
766                                                         "|/\./|",
767                                                         "|/[^\/]+/\.\./|"
768                                                 );
769
770                 $replace = array(       "",
771                                                         $match_root."/",
772                                                         $match."/",
773                                                         "/",
774                                                         "/"
775                                                 );
776
777                 $expandedLinks = preg_replace($search,$replace,$links);
778
779                 return $expandedLinks;
780         }
781
782 /*======================================================================*\
783         Function:       _httprequest
784         Purpose:        go get the http data from the server
785         Input:          $url            the url to fetch
786                                 $fp                     the current open file pointer
787                                 $URI            the full URI
788                                 $body           body contents to send if any (POST)
789         Output:
790 \*======================================================================*/
791
792         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
793         {
794                 $cookie_headers = '';
795                 if($this->passcookies && $this->_redirectaddr)
796                         $this->setcookies();
797
798                 $URI_PARTS = parse_url($URI);
799                 if(empty($url))
800                         $url = "/";
801                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
802                 if(!empty($this->agent))
803                         $headers .= "User-Agent: ".$this->agent."\r\n";
804                 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
805                         $headers .= "Host: ".$this->host;
806                         if(!empty($this->port) && $this->port != 80)
807                                 $headers .= ":".$this->port;
808                         $headers .= "\r\n";
809                 }
810                 if(!empty($this->accept))
811                         $headers .= "Accept: ".$this->accept."\r\n";
812                 if(!empty($this->referer))
813                         $headers .= "Referer: ".$this->referer."\r\n";
814                 if(!empty($this->cookies))
815                 {
816                         if(!is_array($this->cookies))
817                                 $this->cookies = (array)$this->cookies;
818
819                         reset($this->cookies);
820                         if ( count($this->cookies) > 0 ) {
821                                 $cookie_headers .= 'Cookie: ';
822                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
823                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
824                                 }
825                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
826                         }
827                 }
828                 if(!empty($this->rawheaders))
829                 {
830                         if(!is_array($this->rawheaders))
831                                 $this->rawheaders = (array)$this->rawheaders;
832                         while(list($headerKey,$headerVal) = each($this->rawheaders))
833                                 $headers .= $headerKey.": ".$headerVal."\r\n";
834                 }
835                 if(!empty($content_type)) {
836                         $headers .= "Content-type: $content_type";
837                         if ($content_type == "multipart/form-data")
838                                 $headers .= "; boundary=".$this->_mime_boundary;
839                         $headers .= "\r\n";
840                 }
841                 if(!empty($body))
842                         $headers .= "Content-length: ".strlen($body)."\r\n";
843                 if(!empty($this->user) || !empty($this->pass))
844                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
845
846                 //add proxy auth headers
847                 if(!empty($this->proxy_user))
848                         $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
849
850
851                 $headers .= "\r\n";
852
853                 // set the read timeout if needed
854                 if ($this->read_timeout > 0)
855                         socket_set_timeout($fp, $this->read_timeout);
856                 $this->timed_out = false;
857
858                 fwrite($fp,$headers.$body,strlen($headers.$body));
859
860                 $this->_redirectaddr = false;
861                 unset($this->headers);
862
863                 while($currentHeader = fgets($fp,$this->_maxlinelen))
864                 {
865                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
866                         {
867                                 $this->status=-100;
868                                 return false;
869                         }
870
871                         if($currentHeader == "\r\n")
872                                 break;
873
874                         // if a header begins with Location: or URI:, set the redirect
875                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
876                         {
877                                 // get URL portion of the redirect
878                                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
879                                 // look for :// in the Location header to see if hostname is included
880                                 if(!preg_match("|\:\/\/|",$matches[2]))
881                                 {
882                                         // no host in the path, so prepend
883                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
884                                         // eliminate double slash
885                                         if(!preg_match("|^/|",$matches[2]))
886                                                         $this->_redirectaddr .= "/".$matches[2];
887                                         else
888                                                         $this->_redirectaddr .= $matches[2];
889                                 }
890                                 else
891                                         $this->_redirectaddr = $matches[2];
892                         }
893
894                         if(preg_match("|^HTTP/|",$currentHeader))
895                         {
896                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
897                                 {
898                                         $this->status= $status[1];
899                 }
900                                 $this->response_code = $currentHeader;
901                         }
902
903                         $this->headers[] = $currentHeader;
904                 }
905
906                 $results = '';
907                 do {
908                 $_data = fread($fp, $this->maxlength);
909                 if (strlen($_data) == 0) {
910                         break;
911                 }
912                 $results .= $_data;
913                 } while(true);
914
915                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
916                 {
917                         $this->status=-100;
918                         return false;
919                 }
920
921                 // check if there is a a redirect meta tag
922
923                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
924
925                 {
926                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
927                 }
928
929                 // have we hit our frame depth and is there frame src to fetch?
930                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
931                 {
932                         $this->results[] = $results;
933                         for($x=0; $x<count($match[1]); $x++)
934                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
935                 }
936                 // have we already fetched framed content?
937                 elseif(is_array($this->results))
938                         $this->results[] = $results;
939                 // no framed content
940                 else
941                         $this->results = $results;
942
943                 return true;
944         }
945
946 /*======================================================================*\
947         Function:       _httpsrequest
948         Purpose:        go get the https data from the server using curl
949         Input:          $url            the url to fetch
950                                 $URI            the full URI
951                                 $body           body contents to send if any (POST)
952         Output:
953 \*======================================================================*/
954
955         function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
956         {
957                 if($this->passcookies && $this->_redirectaddr)
958                         $this->setcookies();
959
960                 $headers = array();
961
962                 $URI_PARTS = parse_url($URI);
963                 if(empty($url))
964                         $url = "/";
965                 // GET ... header not needed for curl
966                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
967                 if(!empty($this->agent))
968                         $headers[] = "User-Agent: ".$this->agent;
969                 if(!empty($this->host))
970                         if(!empty($this->port))
971                                 $headers[] = "Host: ".$this->host.":".$this->port;
972                         else
973                                 $headers[] = "Host: ".$this->host;
974                 if(!empty($this->accept))
975                         $headers[] = "Accept: ".$this->accept;
976                 if(!empty($this->referer))
977                         $headers[] = "Referer: ".$this->referer;
978                 if(!empty($this->cookies))
979                 {
980                         if(!is_array($this->cookies))
981                                 $this->cookies = (array)$this->cookies;
982
983                         reset($this->cookies);
984                         if ( count($this->cookies) > 0 ) {
985                                 $cookie_str = 'Cookie: ';
986                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
987                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
988                                 }
989                                 $headers[] = substr($cookie_str,0,-2);
990                         }
991                 }
992                 if(!empty($this->rawheaders))
993                 {
994                         if(!is_array($this->rawheaders))
995                                 $this->rawheaders = (array)$this->rawheaders;
996                         while(list($headerKey,$headerVal) = each($this->rawheaders))
997                                 $headers[] = $headerKey.": ".$headerVal;
998                 }
999                 if(!empty($content_type)) {
1000                         if ($content_type == "multipart/form-data")
1001                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1002                         else
1003                                 $headers[] = "Content-type: $content_type";
1004                 }
1005                 if(!empty($body))
1006                         $headers[] = "Content-length: ".strlen($body);
1007                 if(!empty($this->user) || !empty($this->pass))
1008                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1009
1010                 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1011                         $safer_header = strtr( $headers[$curr_header], "\"", " " );
1012                         $cmdline_params .= " -H \"".$safer_header."\"";
1013                 }
1014
1015                 if(!empty($body))
1016                         $cmdline_params .= " -d \"$body\"";
1017
1018                 if($this->read_timeout > 0)
1019                         $cmdline_params .= " -m ".$this->read_timeout;
1020
1021                 $headerfile = tempnam($temp_dir, "sno");
1022
1023                 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1024                 exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1025
1026                 if($return)
1027                 {
1028                         $this->error = "Error: cURL could not retrieve the document, error $return.";
1029                         return false;
1030                 }
1031
1032
1033                 $results = implode("\r\n",$results);
1034
1035                 $result_headers = file("$headerfile");
1036
1037                 $this->_redirectaddr = false;
1038                 unset($this->headers);
1039
1040                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1041                 {
1042
1043                         // if a header begins with Location: or URI:, set the redirect
1044                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1045                         {
1046                                 // get URL portion of the redirect
1047                                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1048                                 // look for :// in the Location header to see if hostname is included
1049                                 if(!preg_match("|\:\/\/|",$matches[2]))
1050                                 {
1051                                         // no host in the path, so prepend
1052                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1053                                         // eliminate double slash
1054                                         if(!preg_match("|^/|",$matches[2]))
1055                                                         $this->_redirectaddr .= "/".$matches[2];
1056                                         else
1057                                                         $this->_redirectaddr .= $matches[2];
1058                                 }
1059                                 else
1060                                         $this->_redirectaddr = $matches[2];
1061                         }
1062
1063                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1064                                 $this->response_code = $result_headers[$currentHeader];
1065
1066                         $this->headers[] = $result_headers[$currentHeader];
1067                 }
1068
1069                 // check if there is a a redirect meta tag
1070
1071                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1072                 {
1073                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1074                 }
1075
1076                 // have we hit our frame depth and is there frame src to fetch?
1077                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1078                 {
1079                         $this->results[] = $results;
1080                         for($x=0; $x<count($match[1]); $x++)
1081                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1082                 }
1083                 // have we already fetched framed content?
1084                 elseif(is_array($this->results))
1085                         $this->results[] = $results;
1086                 // no framed content
1087                 else
1088                         $this->results = $results;
1089
1090                 unlink("$headerfile");
1091
1092                 return true;
1093         }
1094
1095 /*======================================================================*\
1096         Function:       setcookies()
1097         Purpose:        set cookies for a redirection
1098 \*======================================================================*/
1099
1100         function setcookies()
1101         {
1102                 for($x=0; $x<count($this->headers); $x++)
1103                 {
1104                 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1105                         $this->cookies[$match[1]] = urldecode($match[2]);
1106                 }
1107         }
1108
1109
1110 /*======================================================================*\
1111         Function:       _check_timeout
1112         Purpose:        checks whether timeout has occurred
1113         Input:          $fp     file pointer
1114 \*======================================================================*/
1115
1116         function _check_timeout($fp)
1117         {
1118                 if ($this->read_timeout > 0) {
1119                         $fp_status = socket_get_status($fp);
1120                         if ($fp_status["timed_out"]) {
1121                                 $this->timed_out = true;
1122                                 return true;
1123                         }
1124                 }
1125                 return false;
1126         }
1127
1128 /*======================================================================*\
1129         Function:       _connect
1130         Purpose:        make a socket connection
1131         Input:          $fp     file pointer
1132 \*======================================================================*/
1133
1134         function _connect(&$fp)
1135         {
1136                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1137                         {
1138                                 $this->_isproxy = true;
1139
1140                                 $host = $this->proxy_host;
1141                                 $port = $this->proxy_port;
1142                         }
1143                 else
1144                 {
1145                         $host = $this->host;
1146                         $port = $this->port;
1147                 }
1148
1149                 $this->status = 0;
1150
1151                 if($fp = fsockopen(
1152                                         $host,
1153                                         $port,
1154                                         $errno,
1155                                         $errstr,
1156                                         $this->_fp_timeout
1157                                         ))
1158                 {
1159                         // socket connection succeeded
1160
1161                         return true;
1162                 }
1163                 else
1164                 {
1165                         // socket connection failed
1166                         $this->status = $errno;
1167                         switch($errno)
1168                         {
1169                                 case -3:
1170                                         $this->error="socket creation failed (-3)";
1171                                 case -4:
1172                                         $this->error="dns lookup failure (-4)";
1173                                 case -5:
1174                                         $this->error="connection refused or timed out (-5)";
1175                                 default:
1176                                         $this->error="connection failed (".$errno.")";
1177                         }
1178                         return false;
1179                 }
1180         }
1181 /*======================================================================*\
1182         Function:       _disconnect
1183         Purpose:        disconnect a socket connection
1184         Input:          $fp     file pointer
1185 \*======================================================================*/
1186
1187         function _disconnect($fp)
1188         {
1189                 return(fclose($fp));
1190         }
1191
1192
1193 /*======================================================================*\
1194         Function:       _prepare_post_body
1195         Purpose:        Prepare post body according to encoding type
1196         Input:          $formvars  - form variables
1197                                 $formfiles - form upload files
1198         Output:         post body
1199 \*======================================================================*/
1200
1201         function _prepare_post_body($formvars, $formfiles)
1202         {
1203                 settype($formvars, "array");
1204                 settype($formfiles, "array");
1205                 $postdata = '';
1206
1207                 if (count($formvars) == 0 && count($formfiles) == 0)
1208                         return;
1209
1210                 switch ($this->_submit_type) {
1211                         case "application/x-www-form-urlencoded":
1212                                 reset($formvars);
1213                                 while(list($key,$val) = each($formvars)) {
1214                                         if (is_array($val) || is_object($val)) {
1215                                                 while (list($cur_key, $cur_val) = each($val)) {
1216                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1217                                                 }
1218                                         } else
1219                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
1220                                 }
1221                                 break;
1222
1223                         case "multipart/form-data":
1224                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1225
1226                                 reset($formvars);
1227                                 while(list($key,$val) = each($formvars)) {
1228                                         if (is_array($val) || is_object($val)) {
1229                                                 while (list($cur_key, $cur_val) = each($val)) {
1230                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
1231                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1232                                                         $postdata .= "$cur_val\r\n";
1233                                                 }
1234                                         } else {
1235                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1236                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1237                                                 $postdata .= "$val\r\n";
1238                                         }
1239                                 }
1240
1241                                 reset($formfiles);
1242                                 while (list($field_name, $file_names) = each($formfiles)) {
1243                                         settype($file_names, "array");
1244                                         while (list(, $file_name) = each($file_names)) {
1245                                                 if (!is_readable($file_name)) continue;
1246
1247                                                 $fp = fopen($file_name, "r");
1248                                                 while (!feof($fp)) {
1249                                                         $file_content .= fread($fp, filesize($file_name));
1250                                                 }
1251                                                 fclose($fp);
1252                                                 $base_name = basename($file_name);
1253
1254                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1255                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1256                                                 $postdata .= "$file_content\r\n";
1257                                         }
1258                                 }
1259                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1260                                 break;
1261                 }
1262
1263                 return $postdata;
1264         }
1265 }
1266 endif;
1267
1268 ?>