Wordpress 3.6
[autoinstalls/wordpress.git] / wp-includes / class-snoopy.php
1 <?php
2
3 /**
4  * Deprecated. Use WP_HTTP (http.php, class-http.php) instead.
5  */
6 _deprecated_file( basename( __FILE__ ), '3.0', WPINC . '/http.php' );
7
8 if ( !class_exists( 'Snoopy' ) ) :
9 /*************************************************
10
11 Snoopy - the PHP net client
12 Author: Monte Ohrt <monte@ispi.net>
13 Copyright (c): 1999-2008 New Digital Group, all rights reserved
14 Version: 1.2.4
15
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  *
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
29
30 You may contact the author of Snoopy by e-mail at:
31 monte@ohrt.com
32
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.net/
35
36 *************************************************/
37
38 class Snoopy
39 {
40         /**** Public variables ****/
41
42         /* user definable vars */
43
44         var $host                       =       "www.php.net";          // host name we are connecting to
45         var $port                       =       80;                                     // port we are connecting to
46         var $proxy_host         =       "";                                     // proxy host to use
47         var $proxy_port         =       "";                                     // proxy port to use
48         var $proxy_user         =       "";                                     // proxy user to use
49         var $proxy_pass         =       "";                                     // proxy password to use
50
51         var $agent                      =       "Snoopy v1.2.4";        // agent we masquerade as
52         var     $referer                =       "";                                     // referer info to pass
53         var $cookies            =       array();                        // array of cookies to pass
54                                                                                                 // $cookies["username"]="joe";
55         var     $rawheaders             =       array();                        // array of raw headers to send
56                                                                                                 // $rawheaders["Content-type"]="text/html";
57
58         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
59         var $lastredirectaddr   =       "";                             // contains address of last redirected address
60         var     $offsiteok              =       true;                           // allows redirection off-site
61         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
62         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
63                                                                                                 // this only applies to fetchlinks()
64                                                                                                 // submitlinks(), and submittext()
65         var $passcookies        =       true;                           // pass set cookies back through redirects
66                                                                                                 // NOTE: this currently does not respect
67                                                                                                 // dates, domains or paths.
68
69         var     $user                   =       "";                                     // user for http authentication
70         var     $pass                   =       "";                                     // password for http authentication
71
72         // http accept types
73         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
74
75         var $results            =       "";                                     // where the content is put
76
77         var $error                      =       "";                                     // error messages sent here
78         var     $response_code  =       "";                                     // response code returned from server
79         var     $headers                =       array();                        // headers returned from server sent here
80         var     $maxlength              =       500000;                         // max return data length (body)
81         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
82                                                                                                 // supported only since PHP 4 Beta 4
83                                                                                                 // set to 0 to disallow timeouts
84         var $timed_out          =       false;                          // if a read operation timed out
85         var     $status                 =       0;                                      // http request status
86
87         var $temp_dir           =       "/tmp";                         // temporary directory that the webserver
88                                                                                                 // has permission to write to.
89                                                                                                 // under Windows, this should be C:\temp
90
91         var     $curl_path              =       "/usr/local/bin/curl";
92                                                                                                 // Snoopy will use cURL for fetching
93                                                                                                 // SSL content if a full system path to
94                                                                                                 // the cURL binary is supplied here.
95                                                                                                 // set to false if you do not have
96                                                                                                 // cURL installed. See http://curl.haxx.se
97                                                                                                 // for details on installing cURL.
98                                                                                                 // Snoopy does *not* use the cURL
99                                                                                                 // library functions built into php,
100                                                                                                 // as these functions are not stable
101                                                                                                 // as of this Snoopy release.
102
103         /**** Private variables ****/
104
105         var     $_maxlinelen    =       4096;                           // max line length (headers)
106
107         var $_httpmethod        =       "GET";                          // default http request method
108         var $_httpversion       =       "HTTP/1.0";                     // default http request version
109         var $_submit_method     =       "POST";                         // default submit method
110         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
111         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
112         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
113         var $_redirectdepth     =       0;                                      // increments on an http redirect
114         var $_frameurls         =       array();                        // frame src urls
115         var $_framedepth        =       0;                                      // increments on frame depth
116
117         var $_isproxy           =       false;                          // set if using a proxy server
118         var $_fp_timeout        =       30;                                     // timeout for socket connection
119
120 /*======================================================================*\
121         Function:       fetch
122         Purpose:        fetch the contents of a web page
123                                 (and possibly other protocols in the
124                                 future like ftp, nntp, gopher, etc.)
125         Input:          $URI    the location of the page to fetch
126         Output:         $this->results  the output text from the fetch
127 \*======================================================================*/
128
129         function fetch($URI)
130         {
131
132                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
133                 $URI_PARTS = parse_url($URI);
134                 if (!empty($URI_PARTS["user"]))
135                         $this->user = $URI_PARTS["user"];
136                 if (!empty($URI_PARTS["pass"]))
137                         $this->pass = $URI_PARTS["pass"];
138                 if (empty($URI_PARTS["query"]))
139                         $URI_PARTS["query"] = '';
140                 if (empty($URI_PARTS["path"]))
141                         $URI_PARTS["path"] = '';
142
143                 switch(strtolower($URI_PARTS["scheme"]))
144                 {
145                         case "http":
146                                 $this->host = $URI_PARTS["host"];
147                                 if(!empty($URI_PARTS["port"]))
148                                         $this->port = $URI_PARTS["port"];
149                                 if($this->_connect($fp))
150                                 {
151                                         if($this->_isproxy)
152                                         {
153                                                 // using proxy, send entire URI
154                                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
155                                         }
156                                         else
157                                         {
158                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
159                                                 // no proxy, send only the path
160                                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
161                                         }
162
163                                         $this->_disconnect($fp);
164
165                                         if($this->_redirectaddr)
166                                         {
167                                                 /* url was redirected, check if we've hit the max depth */
168                                                 if($this->maxredirs > $this->_redirectdepth)
169                                                 {
170                                                         // only follow redirect if it's on this site, or offsiteok is true
171                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
172                                                         {
173                                                                 /* follow the redirect */
174                                                                 $this->_redirectdepth++;
175                                                                 $this->lastredirectaddr=$this->_redirectaddr;
176                                                                 $this->fetch($this->_redirectaddr);
177                                                         }
178                                                 }
179                                         }
180
181                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
182                                         {
183                                                 $frameurls = $this->_frameurls;
184                                                 $this->_frameurls = array();
185
186                                                 while(list(,$frameurl) = each($frameurls))
187                                                 {
188                                                         if($this->_framedepth < $this->maxframes)
189                                                         {
190                                                                 $this->fetch($frameurl);
191                                                                 $this->_framedepth++;
192                                                         }
193                                                         else
194                                                                 break;
195                                                 }
196                                         }
197                                 }
198                                 else
199                                 {
200                                         return false;
201                                 }
202                                 return true;
203                                 break;
204                         case "https":
205                                 if(!$this->curl_path)
206                                         return false;
207                                 if(function_exists("is_executable"))
208                                     if (!is_executable($this->curl_path))
209                                         return false;
210                                 $this->host = $URI_PARTS["host"];
211                                 if(!empty($URI_PARTS["port"]))
212                                         $this->port = $URI_PARTS["port"];
213                                 if($this->_isproxy)
214                                 {
215                                         // using proxy, send entire URI
216                                         $this->_httpsrequest($URI,$URI,$this->_httpmethod);
217                                 }
218                                 else
219                                 {
220                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
221                                         // no proxy, send only the path
222                                         $this->_httpsrequest($path, $URI, $this->_httpmethod);
223                                 }
224
225                                 if($this->_redirectaddr)
226                                 {
227                                         /* url was redirected, check if we've hit the max depth */
228                                         if($this->maxredirs > $this->_redirectdepth)
229                                         {
230                                                 // only follow redirect if it's on this site, or offsiteok is true
231                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
232                                                 {
233                                                         /* follow the redirect */
234                                                         $this->_redirectdepth++;
235                                                         $this->lastredirectaddr=$this->_redirectaddr;
236                                                         $this->fetch($this->_redirectaddr);
237                                                 }
238                                         }
239                                 }
240
241                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
242                                 {
243                                         $frameurls = $this->_frameurls;
244                                         $this->_frameurls = array();
245
246                                         while(list(,$frameurl) = each($frameurls))
247                                         {
248                                                 if($this->_framedepth < $this->maxframes)
249                                                 {
250                                                         $this->fetch($frameurl);
251                                                         $this->_framedepth++;
252                                                 }
253                                                 else
254                                                         break;
255                                         }
256                                 }
257                                 return true;
258                                 break;
259                         default:
260                                 // not a valid protocol
261                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
262                                 return false;
263                                 break;
264                 }
265                 return true;
266         }
267
268 /*======================================================================*\
269         Function:       submit
270         Purpose:        submit an http form
271         Input:          $URI    the location to post the data
272                                 $formvars       the formvars to use.
273                                         format: $formvars["var"] = "val";
274                                 $formfiles  an array of files to submit
275                                         format: $formfiles["var"] = "/dir/filename.ext";
276         Output:         $this->results  the text output from the post
277 \*======================================================================*/
278
279         function submit($URI, $formvars="", $formfiles="")
280         {
281                 unset($postdata);
282
283                 $postdata = $this->_prepare_post_body($formvars, $formfiles);
284
285                 $URI_PARTS = parse_url($URI);
286                 if (!empty($URI_PARTS["user"]))
287                         $this->user = $URI_PARTS["user"];
288                 if (!empty($URI_PARTS["pass"]))
289                         $this->pass = $URI_PARTS["pass"];
290                 if (empty($URI_PARTS["query"]))
291                         $URI_PARTS["query"] = '';
292                 if (empty($URI_PARTS["path"]))
293                         $URI_PARTS["path"] = '';
294
295                 switch(strtolower($URI_PARTS["scheme"]))
296                 {
297                         case "http":
298                                 $this->host = $URI_PARTS["host"];
299                                 if(!empty($URI_PARTS["port"]))
300                                         $this->port = $URI_PARTS["port"];
301                                 if($this->_connect($fp))
302                                 {
303                                         if($this->_isproxy)
304                                         {
305                                                 // using proxy, send entire URI
306                                                 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
307                                         }
308                                         else
309                                         {
310                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
311                                                 // no proxy, send only the path
312                                                 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
313                                         }
314
315                                         $this->_disconnect($fp);
316
317                                         if($this->_redirectaddr)
318                                         {
319                                                 /* url was redirected, check if we've hit the max depth */
320                                                 if($this->maxredirs > $this->_redirectdepth)
321                                                 {
322                                                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
323                                                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
324
325                                                         // only follow redirect if it's on this site, or offsiteok is true
326                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
327                                                         {
328                                                                 /* follow the redirect */
329                                                                 $this->_redirectdepth++;
330                                                                 $this->lastredirectaddr=$this->_redirectaddr;
331                                                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
332                                                                         $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
333                                                                 else
334                                                                         $this->submit($this->_redirectaddr,$formvars, $formfiles);
335                                                         }
336                                                 }
337                                         }
338
339                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
340                                         {
341                                                 $frameurls = $this->_frameurls;
342                                                 $this->_frameurls = array();
343
344                                                 while(list(,$frameurl) = each($frameurls))
345                                                 {
346                                                         if($this->_framedepth < $this->maxframes)
347                                                         {
348                                                                 $this->fetch($frameurl);
349                                                                 $this->_framedepth++;
350                                                         }
351                                                         else
352                                                                 break;
353                                                 }
354                                         }
355
356                                 }
357                                 else
358                                 {
359                                         return false;
360                                 }
361                                 return true;
362                                 break;
363                         case "https":
364                                 if(!$this->curl_path)
365                                         return false;
366                                 if(function_exists("is_executable"))
367                                     if (!is_executable($this->curl_path))
368                                         return false;
369                                 $this->host = $URI_PARTS["host"];
370                                 if(!empty($URI_PARTS["port"]))
371                                         $this->port = $URI_PARTS["port"];
372                                 if($this->_isproxy)
373                                 {
374                                         // using proxy, send entire URI
375                                         $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376                                 }
377                                 else
378                                 {
379                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
380                                         // no proxy, send only the path
381                                         $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
382                                 }
383
384                                 if($this->_redirectaddr)
385                                 {
386                                         /* url was redirected, check if we've hit the max depth */
387                                         if($this->maxredirs > $this->_redirectdepth)
388                                         {
389                                                 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
390                                                         $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
391
392                                                 // only follow redirect if it's on this site, or offsiteok is true
393                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
394                                                 {
395                                                         /* follow the redirect */
396                                                         $this->_redirectdepth++;
397                                                         $this->lastredirectaddr=$this->_redirectaddr;
398                                                         if( strpos( $this->_redirectaddr, "?" ) > 0 )
399                                                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
400                                                         else
401                                                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
402                                                 }
403                                         }
404                                 }
405
406                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
407                                 {
408                                         $frameurls = $this->_frameurls;
409                                         $this->_frameurls = array();
410
411                                         while(list(,$frameurl) = each($frameurls))
412                                         {
413                                                 if($this->_framedepth < $this->maxframes)
414                                                 {
415                                                         $this->fetch($frameurl);
416                                                         $this->_framedepth++;
417                                                 }
418                                                 else
419                                                         break;
420                                         }
421                                 }
422                                 return true;
423                                 break;
424
425                         default:
426                                 // not a valid protocol
427                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
428                                 return false;
429                                 break;
430                 }
431                 return true;
432         }
433
434 /*======================================================================*\
435         Function:       fetchlinks
436         Purpose:        fetch the links from a web page
437         Input:          $URI    where you are fetching from
438         Output:         $this->results  an array of the URLs
439 \*======================================================================*/
440
441         function fetchlinks($URI)
442         {
443                 if ($this->fetch($URI))
444                 {
445                         if($this->lastredirectaddr)
446                                 $URI = $this->lastredirectaddr;
447                         if(is_array($this->results))
448                         {
449                                 for($x=0;$x<count($this->results);$x++)
450                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
451                         }
452                         else
453                                 $this->results = $this->_striplinks($this->results);
454
455                         if($this->expandlinks)
456                                 $this->results = $this->_expandlinks($this->results, $URI);
457                         return true;
458                 }
459                 else
460                         return false;
461         }
462
463 /*======================================================================*\
464         Function:       fetchform
465         Purpose:        fetch the form elements from a web page
466         Input:          $URI    where you are fetching from
467         Output:         $this->results  the resulting html form
468 \*======================================================================*/
469
470         function fetchform($URI)
471         {
472
473                 if ($this->fetch($URI))
474                 {
475
476                         if(is_array($this->results))
477                         {
478                                 for($x=0;$x<count($this->results);$x++)
479                                         $this->results[$x] = $this->_stripform($this->results[$x]);
480                         }
481                         else
482                                 $this->results = $this->_stripform($this->results);
483
484                         return true;
485                 }
486                 else
487                         return false;
488         }
489
490
491 /*======================================================================*\
492         Function:       fetchtext
493         Purpose:        fetch the text from a web page, stripping the links
494         Input:          $URI    where you are fetching from
495         Output:         $this->results  the text from the web page
496 \*======================================================================*/
497
498         function fetchtext($URI)
499         {
500                 if($this->fetch($URI))
501                 {
502                         if(is_array($this->results))
503                         {
504                                 for($x=0;$x<count($this->results);$x++)
505                                         $this->results[$x] = $this->_striptext($this->results[$x]);
506                         }
507                         else
508                                 $this->results = $this->_striptext($this->results);
509                         return true;
510                 }
511                 else
512                         return false;
513         }
514
515 /*======================================================================*\
516         Function:       submitlinks
517         Purpose:        grab links from a form submission
518         Input:          $URI    where you are submitting from
519         Output:         $this->results  an array of the links from the post
520 \*======================================================================*/
521
522         function submitlinks($URI, $formvars="", $formfiles="")
523         {
524                 if($this->submit($URI,$formvars, $formfiles))
525                 {
526                         if($this->lastredirectaddr)
527                                 $URI = $this->lastredirectaddr;
528                         if(is_array($this->results))
529                         {
530                                 for($x=0;$x<count($this->results);$x++)
531                                 {
532                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
533                                         if($this->expandlinks)
534                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
535                                 }
536                         }
537                         else
538                         {
539                                 $this->results = $this->_striplinks($this->results);
540                                 if($this->expandlinks)
541                                         $this->results = $this->_expandlinks($this->results,$URI);
542                         }
543                         return true;
544                 }
545                 else
546                         return false;
547         }
548
549 /*======================================================================*\
550         Function:       submittext
551         Purpose:        grab text from a form submission
552         Input:          $URI    where you are submitting from
553         Output:         $this->results  the text from the web page
554 \*======================================================================*/
555
556         function submittext($URI, $formvars = "", $formfiles = "")
557         {
558                 if($this->submit($URI,$formvars, $formfiles))
559                 {
560                         if($this->lastredirectaddr)
561                                 $URI = $this->lastredirectaddr;
562                         if(is_array($this->results))
563                         {
564                                 for($x=0;$x<count($this->results);$x++)
565                                 {
566                                         $this->results[$x] = $this->_striptext($this->results[$x]);
567                                         if($this->expandlinks)
568                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
569                                 }
570                         }
571                         else
572                         {
573                                 $this->results = $this->_striptext($this->results);
574                                 if($this->expandlinks)
575                                         $this->results = $this->_expandlinks($this->results,$URI);
576                         }
577                         return true;
578                 }
579                 else
580                         return false;
581         }
582
583
584
585 /*======================================================================*\
586         Function:       set_submit_multipart
587         Purpose:        Set the form submission content type to
588                                 multipart/form-data
589 \*======================================================================*/
590         function set_submit_multipart()
591         {
592                 $this->_submit_type = "multipart/form-data";
593         }
594
595
596 /*======================================================================*\
597         Function:       set_submit_normal
598         Purpose:        Set the form submission content type to
599                                 application/x-www-form-urlencoded
600 \*======================================================================*/
601         function set_submit_normal()
602         {
603                 $this->_submit_type = "application/x-www-form-urlencoded";
604         }
605
606
607
608
609 /*======================================================================*\
610         Private functions
611 \*======================================================================*/
612
613
614 /*======================================================================*\
615         Function:       _striplinks
616         Purpose:        strip the hyperlinks from an html document
617         Input:          $document       document to strip.
618         Output:         $match          an array of the links
619 \*======================================================================*/
620
621         function _striplinks($document)
622         {
623                 preg_match_all("'<\s*a\s.*?href\s*=\s*                  # find <a href=
624                                                 ([\"\'])?                                       # find single or double quote
625                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
626                                                                                                         # quote, otherwise match up to next space
627                                                 'isx",$document,$links);
628
629
630                 // catenate the non-empty matches from the conditional subpattern
631
632                 while(list($key,$val) = each($links[2]))
633                 {
634                         if(!empty($val))
635                                 $match[] = $val;
636                 }
637
638                 while(list($key,$val) = each($links[3]))
639                 {
640                         if(!empty($val))
641                                 $match[] = $val;
642                 }
643
644                 // return the links
645                 return $match;
646         }
647
648 /*======================================================================*\
649         Function:       _stripform
650         Purpose:        strip the form elements from an html document
651         Input:          $document       document to strip.
652         Output:         $match          an array of the links
653 \*======================================================================*/
654
655         function _stripform($document)
656         {
657                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
658
659                 // catenate the matches
660                 $match = implode("\r\n",$elements[0]);
661
662                 // return the links
663                 return $match;
664         }
665
666
667
668 /*======================================================================*\
669         Function:       _striptext
670         Purpose:        strip the text from an html document
671         Input:          $document       document to strip.
672         Output:         $text           the resulting text
673 \*======================================================================*/
674
675         function _striptext($document)
676         {
677
678                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
679                 // so, list your entities one by one here. I included some of the
680                 // more common ones.
681
682                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
683                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
684                                                 "'([\r\n])[\s]+'",                                      // strip out white space
685                                                 "'&(quot|#34|#034|#x22);'i",            // replace html entities
686                                                 "'&(amp|#38|#038|#x26);'i",                     // added hexadecimal values
687                                                 "'&(lt|#60|#060|#x3c);'i",
688                                                 "'&(gt|#62|#062|#x3e);'i",
689                                                 "'&(nbsp|#160|#xa0);'i",
690                                                 "'&(iexcl|#161);'i",
691                                                 "'&(cent|#162);'i",
692                                                 "'&(pound|#163);'i",
693                                                 "'&(copy|#169);'i",
694                                                 "'&(reg|#174);'i",
695                                                 "'&(deg|#176);'i",
696                                                 "'&(#39|#039|#x27);'",
697                                                 "'&(euro|#8364);'i",                            // europe
698                                                 "'&a(uml|UML);'",                                       // german
699                                                 "'&o(uml|UML);'",
700                                                 "'&u(uml|UML);'",
701                                                 "'&A(uml|UML);'",
702                                                 "'&O(uml|UML);'",
703                                                 "'&U(uml|UML);'",
704                                                 "'&szlig;'i",
705                                                 );
706                 $replace = array(       "",
707                                                         "",
708                                                         "\\1",
709                                                         "\"",
710                                                         "&",
711                                                         "<",
712                                                         ">",
713                                                         " ",
714                                                         chr(161),
715                                                         chr(162),
716                                                         chr(163),
717                                                         chr(169),
718                                                         chr(174),
719                                                         chr(176),
720                                                         chr(39),
721                                                         chr(128),
722                                                         chr(0xE4), // ANSI &auml;
723                                                         chr(0xF6), // ANSI &ouml;
724                                                         chr(0xFC), // ANSI &uuml;
725                                                         chr(0xC4), // ANSI &Auml;
726                                                         chr(0xD6), // ANSI &Ouml;
727                                                         chr(0xDC), // ANSI &Uuml;
728                                                         chr(0xDF), // ANSI &szlig;
729                                                 );
730
731                 $text = preg_replace($search,$replace,$document);
732
733                 return $text;
734         }
735
736 /*======================================================================*\
737         Function:       _expandlinks
738         Purpose:        expand each link into a fully qualified URL
739         Input:          $links                  the links to qualify
740                                 $URI                    the full URI to get the base from
741         Output:         $expandedLinks  the expanded links
742 \*======================================================================*/
743
744         function _expandlinks($links,$URI)
745         {
746
747                 preg_match("/^[^\?]+/",$URI,$match);
748
749                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
750                 $match = preg_replace("|/$|","",$match);
751                 $match_part = parse_url($match);
752                 $match_root =
753                 $match_part["scheme"]."://".$match_part["host"];
754
755                 $search = array(        "|^http://".preg_quote($this->host)."|i",
756                                                         "|^(\/)|i",
757                                                         "|^(?!http://)(?!mailto:)|i",
758                                                         "|/\./|",
759                                                         "|/[^\/]+/\.\./|"
760                                                 );
761
762                 $replace = array(       "",
763                                                         $match_root."/",
764                                                         $match."/",
765                                                         "/",
766                                                         "/"
767                                                 );
768
769                 $expandedLinks = preg_replace($search,$replace,$links);
770
771                 return $expandedLinks;
772         }
773
774 /*======================================================================*\
775         Function:       _httprequest
776         Purpose:        go get the http data from the server
777         Input:          $url            the url to fetch
778                                 $fp                     the current open file pointer
779                                 $URI            the full URI
780                                 $body           body contents to send if any (POST)
781         Output:
782 \*======================================================================*/
783
784         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
785         {
786                 $cookie_headers = '';
787                 if($this->passcookies && $this->_redirectaddr)
788                         $this->setcookies();
789
790                 $URI_PARTS = parse_url($URI);
791                 if(empty($url))
792                         $url = "/";
793                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
794                 if(!empty($this->agent))
795                         $headers .= "User-Agent: ".$this->agent."\r\n";
796                 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
797                         $headers .= "Host: ".$this->host;
798                         if(!empty($this->port) && $this->port != 80)
799                                 $headers .= ":".$this->port;
800                         $headers .= "\r\n";
801                 }
802                 if(!empty($this->accept))
803                         $headers .= "Accept: ".$this->accept."\r\n";
804                 if(!empty($this->referer))
805                         $headers .= "Referer: ".$this->referer."\r\n";
806                 if(!empty($this->cookies))
807                 {
808                         if(!is_array($this->cookies))
809                                 $this->cookies = (array)$this->cookies;
810
811                         reset($this->cookies);
812                         if ( count($this->cookies) > 0 ) {
813                                 $cookie_headers .= 'Cookie: ';
814                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
815                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
816                                 }
817                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
818                         }
819                 }
820                 if(!empty($this->rawheaders))
821                 {
822                         if(!is_array($this->rawheaders))
823                                 $this->rawheaders = (array)$this->rawheaders;
824                         while(list($headerKey,$headerVal) = each($this->rawheaders))
825                                 $headers .= $headerKey.": ".$headerVal."\r\n";
826                 }
827                 if(!empty($content_type)) {
828                         $headers .= "Content-type: $content_type";
829                         if ($content_type == "multipart/form-data")
830                                 $headers .= "; boundary=".$this->_mime_boundary;
831                         $headers .= "\r\n";
832                 }
833                 if(!empty($body))
834                         $headers .= "Content-length: ".strlen($body)."\r\n";
835                 if(!empty($this->user) || !empty($this->pass))
836                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
837
838                 //add proxy auth headers
839                 if(!empty($this->proxy_user))
840                         $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
841
842
843                 $headers .= "\r\n";
844
845                 // set the read timeout if needed
846                 if ($this->read_timeout > 0)
847                         socket_set_timeout($fp, $this->read_timeout);
848                 $this->timed_out = false;
849
850                 fwrite($fp,$headers.$body,strlen($headers.$body));
851
852                 $this->_redirectaddr = false;
853                 unset($this->headers);
854
855                 while($currentHeader = fgets($fp,$this->_maxlinelen))
856                 {
857                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
858                         {
859                                 $this->status=-100;
860                                 return false;
861                         }
862
863                         if($currentHeader == "\r\n")
864                                 break;
865
866                         // if a header begins with Location: or URI:, set the redirect
867                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
868                         {
869                                 // get URL portion of the redirect
870                                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
871                                 // look for :// in the Location header to see if hostname is included
872                                 if(!preg_match("|\:\/\/|",$matches[2]))
873                                 {
874                                         // no host in the path, so prepend
875                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
876                                         // eliminate double slash
877                                         if(!preg_match("|^/|",$matches[2]))
878                                                         $this->_redirectaddr .= "/".$matches[2];
879                                         else
880                                                         $this->_redirectaddr .= $matches[2];
881                                 }
882                                 else
883                                         $this->_redirectaddr = $matches[2];
884                         }
885
886                         if(preg_match("|^HTTP/|",$currentHeader))
887                         {
888                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
889                                 {
890                                         $this->status= $status[1];
891                 }
892                                 $this->response_code = $currentHeader;
893                         }
894
895                         $this->headers[] = $currentHeader;
896                 }
897
898                 $results = '';
899                 do {
900                 $_data = fread($fp, $this->maxlength);
901                 if (strlen($_data) == 0) {
902                         break;
903                 }
904                 $results .= $_data;
905                 } while(true);
906
907                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
908                 {
909                         $this->status=-100;
910                         return false;
911                 }
912
913                 // check if there is a redirect meta tag
914
915                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
916
917                 {
918                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
919                 }
920
921                 // have we hit our frame depth and is there frame src to fetch?
922                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
923                 {
924                         $this->results[] = $results;
925                         for($x=0; $x<count($match[1]); $x++)
926                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
927                 }
928                 // have we already fetched framed content?
929                 elseif(is_array($this->results))
930                         $this->results[] = $results;
931                 // no framed content
932                 else
933                         $this->results = $results;
934
935                 return true;
936         }
937
938 /*======================================================================*\
939         Function:       _httpsrequest
940         Purpose:        go get the https data from the server using curl
941         Input:          $url            the url to fetch
942                                 $URI            the full URI
943                                 $body           body contents to send if any (POST)
944         Output:
945 \*======================================================================*/
946
947         function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
948         {
949                 if($this->passcookies && $this->_redirectaddr)
950                         $this->setcookies();
951
952                 $headers = array();
953
954                 $URI_PARTS = parse_url($URI);
955                 if(empty($url))
956                         $url = "/";
957                 // GET ... header not needed for curl
958                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
959                 if(!empty($this->agent))
960                         $headers[] = "User-Agent: ".$this->agent;
961                 if(!empty($this->host))
962                         if(!empty($this->port))
963                                 $headers[] = "Host: ".$this->host.":".$this->port;
964                         else
965                                 $headers[] = "Host: ".$this->host;
966                 if(!empty($this->accept))
967                         $headers[] = "Accept: ".$this->accept;
968                 if(!empty($this->referer))
969                         $headers[] = "Referer: ".$this->referer;
970                 if(!empty($this->cookies))
971                 {
972                         if(!is_array($this->cookies))
973                                 $this->cookies = (array)$this->cookies;
974
975                         reset($this->cookies);
976                         if ( count($this->cookies) > 0 ) {
977                                 $cookie_str = 'Cookie: ';
978                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
979                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
980                                 }
981                                 $headers[] = substr($cookie_str,0,-2);
982                         }
983                 }
984                 if(!empty($this->rawheaders))
985                 {
986                         if(!is_array($this->rawheaders))
987                                 $this->rawheaders = (array)$this->rawheaders;
988                         while(list($headerKey,$headerVal) = each($this->rawheaders))
989                                 $headers[] = $headerKey.": ".$headerVal;
990                 }
991                 if(!empty($content_type)) {
992                         if ($content_type == "multipart/form-data")
993                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
994                         else
995                                 $headers[] = "Content-type: $content_type";
996                 }
997                 if(!empty($body))
998                         $headers[] = "Content-length: ".strlen($body);
999                 if(!empty($this->user) || !empty($this->pass))
1000                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1001
1002                 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1003                         $safer_header = strtr( $headers[$curr_header], "\"", " " );
1004                         $cmdline_params .= " -H \"".$safer_header."\"";
1005                 }
1006
1007                 if(!empty($body))
1008                         $cmdline_params .= " -d \"$body\"";
1009
1010                 if($this->read_timeout > 0)
1011                         $cmdline_params .= " -m ".$this->read_timeout;
1012
1013                 $headerfile = tempnam($this->temp_dir, "sno");
1014
1015                 exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1016
1017                 if($return)
1018                 {
1019                         $this->error = "Error: cURL could not retrieve the document, error $return.";
1020                         return false;
1021                 }
1022
1023
1024                 $results = implode("\r\n",$results);
1025
1026                 $result_headers = file("$headerfile");
1027
1028                 $this->_redirectaddr = false;
1029                 unset($this->headers);
1030
1031                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1032                 {
1033
1034                         // if a header begins with Location: or URI:, set the redirect
1035                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1036                         {
1037                                 // get URL portion of the redirect
1038                                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1039                                 // look for :// in the Location header to see if hostname is included
1040                                 if(!preg_match("|\:\/\/|",$matches[2]))
1041                                 {
1042                                         // no host in the path, so prepend
1043                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1044                                         // eliminate double slash
1045                                         if(!preg_match("|^/|",$matches[2]))
1046                                                         $this->_redirectaddr .= "/".$matches[2];
1047                                         else
1048                                                         $this->_redirectaddr .= $matches[2];
1049                                 }
1050                                 else
1051                                         $this->_redirectaddr = $matches[2];
1052                         }
1053
1054                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1055                                 $this->response_code = $result_headers[$currentHeader];
1056
1057                         $this->headers[] = $result_headers[$currentHeader];
1058                 }
1059
1060                 // check if there is a redirect meta tag
1061
1062                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1063                 {
1064                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1065                 }
1066
1067                 // have we hit our frame depth and is there frame src to fetch?
1068                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1069                 {
1070                         $this->results[] = $results;
1071                         for($x=0; $x<count($match[1]); $x++)
1072                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1073                 }
1074                 // have we already fetched framed content?
1075                 elseif(is_array($this->results))
1076                         $this->results[] = $results;
1077                 // no framed content
1078                 else
1079                         $this->results = $results;
1080
1081                 unlink("$headerfile");
1082
1083                 return true;
1084         }
1085
1086 /*======================================================================*\
1087         Function:       setcookies()
1088         Purpose:        set cookies for a redirection
1089 \*======================================================================*/
1090
1091         function setcookies()
1092         {
1093                 for($x=0; $x<count($this->headers); $x++)
1094                 {
1095                 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1096                         $this->cookies[$match[1]] = urldecode($match[2]);
1097                 }
1098         }
1099
1100
1101 /*======================================================================*\
1102         Function:       _check_timeout
1103         Purpose:        checks whether timeout has occurred
1104         Input:          $fp     file pointer
1105 \*======================================================================*/
1106
1107         function _check_timeout($fp)
1108         {
1109                 if ($this->read_timeout > 0) {
1110                         $fp_status = socket_get_status($fp);
1111                         if ($fp_status["timed_out"]) {
1112                                 $this->timed_out = true;
1113                                 return true;
1114                         }
1115                 }
1116                 return false;
1117         }
1118
1119 /*======================================================================*\
1120         Function:       _connect
1121         Purpose:        make a socket connection
1122         Input:          $fp     file pointer
1123 \*======================================================================*/
1124
1125         function _connect(&$fp)
1126         {
1127                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1128                         {
1129                                 $this->_isproxy = true;
1130
1131                                 $host = $this->proxy_host;
1132                                 $port = $this->proxy_port;
1133                         }
1134                 else
1135                 {
1136                         $host = $this->host;
1137                         $port = $this->port;
1138                 }
1139
1140                 $this->status = 0;
1141
1142                 if($fp = fsockopen(
1143                                         $host,
1144                                         $port,
1145                                         $errno,
1146                                         $errstr,
1147                                         $this->_fp_timeout
1148                                         ))
1149                 {
1150                         // socket connection succeeded
1151
1152                         return true;
1153                 }
1154                 else
1155                 {
1156                         // socket connection failed
1157                         $this->status = $errno;
1158                         switch($errno)
1159                         {
1160                                 case -3:
1161                                         $this->error="socket creation failed (-3)";
1162                                 case -4:
1163                                         $this->error="dns lookup failure (-4)";
1164                                 case -5:
1165                                         $this->error="connection refused or timed out (-5)";
1166                                 default:
1167                                         $this->error="connection failed (".$errno.")";
1168                         }
1169                         return false;
1170                 }
1171         }
1172 /*======================================================================*\
1173         Function:       _disconnect
1174         Purpose:        disconnect a socket connection
1175         Input:          $fp     file pointer
1176 \*======================================================================*/
1177
1178         function _disconnect($fp)
1179         {
1180                 return(fclose($fp));
1181         }
1182
1183
1184 /*======================================================================*\
1185         Function:       _prepare_post_body
1186         Purpose:        Prepare post body according to encoding type
1187         Input:          $formvars  - form variables
1188                                 $formfiles - form upload files
1189         Output:         post body
1190 \*======================================================================*/
1191
1192         function _prepare_post_body($formvars, $formfiles)
1193         {
1194                 settype($formvars, "array");
1195                 settype($formfiles, "array");
1196                 $postdata = '';
1197
1198                 if (count($formvars) == 0 && count($formfiles) == 0)
1199                         return;
1200
1201                 switch ($this->_submit_type) {
1202                         case "application/x-www-form-urlencoded":
1203                                 reset($formvars);
1204                                 while(list($key,$val) = each($formvars)) {
1205                                         if (is_array($val) || is_object($val)) {
1206                                                 while (list($cur_key, $cur_val) = each($val)) {
1207                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1208                                                 }
1209                                         } else
1210                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
1211                                 }
1212                                 break;
1213
1214                         case "multipart/form-data":
1215                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1216
1217                                 reset($formvars);
1218                                 while(list($key,$val) = each($formvars)) {
1219                                         if (is_array($val) || is_object($val)) {
1220                                                 while (list($cur_key, $cur_val) = each($val)) {
1221                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
1222                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1223                                                         $postdata .= "$cur_val\r\n";
1224                                                 }
1225                                         } else {
1226                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1227                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1228                                                 $postdata .= "$val\r\n";
1229                                         }
1230                                 }
1231
1232                                 reset($formfiles);
1233                                 while (list($field_name, $file_names) = each($formfiles)) {
1234                                         settype($file_names, "array");
1235                                         while (list(, $file_name) = each($file_names)) {
1236                                                 if (!is_readable($file_name)) continue;
1237
1238                                                 $fp = fopen($file_name, "r");
1239                                                 $file_content = fread($fp, filesize($file_name));
1240                                                 fclose($fp);
1241                                                 $base_name = basename($file_name);
1242
1243                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1244                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1245                                                 $postdata .= "$file_content\r\n";
1246                                         }
1247                                 }
1248                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1249                                 break;
1250                 }
1251
1252                 return $postdata;
1253         }
1254 }
1255 endif;
1256 ?>