]> scripts.mit.edu Git - autoinstalls/wordpress.git/blob - wp-includes/class-snoopy.php
Wordpress 2.8
[autoinstalls/wordpress.git] / wp-includes / class-snoopy.php
1 <?php
2 if ( !in_array('Snoopy', get_declared_classes() ) ) :
3 /*************************************************
4
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2008 New Digital Group, all rights reserved
8 Version: 1.2.4
9
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24 You may contact the author of Snoopy by e-mail at:
25 monte@ohrt.com
26
27 The latest version of Snoopy can be obtained from:
28 http://snoopy.sourceforge.net/
29
30 *************************************************/
31
32 class Snoopy
33 {
34         /**** Public variables ****/
35
36         /* user definable vars */
37
38         var $host                       =       "www.php.net";          // host name we are connecting to
39         var $port                       =       80;                                     // port we are connecting to
40         var $proxy_host         =       "";                                     // proxy host to use
41         var $proxy_port         =       "";                                     // proxy port to use
42         var $proxy_user         =       "";                                     // proxy user to use
43         var $proxy_pass         =       "";                                     // proxy password to use
44
45         var $agent                      =       "Snoopy v1.2.4";        // agent we masquerade as
46         var     $referer                =       "";                                     // referer info to pass
47         var $cookies            =       array();                        // array of cookies to pass
48                                                                                                 // $cookies["username"]="joe";
49         var     $rawheaders             =       array();                        // array of raw headers to send
50                                                                                                 // $rawheaders["Content-type"]="text/html";
51
52         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
53         var $lastredirectaddr   =       "";                             // contains address of last redirected address
54         var     $offsiteok              =       true;                           // allows redirection off-site
55         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
56         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
57                                                                                                 // this only applies to fetchlinks()
58                                                                                                 // submitlinks(), and submittext()
59         var $passcookies        =       true;                           // pass set cookies back through redirects
60                                                                                                 // NOTE: this currently does not respect
61                                                                                                 // dates, domains or paths.
62
63         var     $user                   =       "";                                     // user for http authentication
64         var     $pass                   =       "";                                     // password for http authentication
65
66         // http accept types
67         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
68
69         var $results            =       "";                                     // where the content is put
70
71         var $error                      =       "";                                     // error messages sent here
72         var     $response_code  =       "";                                     // response code returned from server
73         var     $headers                =       array();                        // headers returned from server sent here
74         var     $maxlength              =       500000;                         // max return data length (body)
75         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
76                                                                                                 // supported only since PHP 4 Beta 4
77                                                                                                 // set to 0 to disallow timeouts
78         var $timed_out          =       false;                          // if a read operation timed out
79         var     $status                 =       0;                                      // http request status
80
81         var $temp_dir           =       "/tmp";                         // temporary directory that the webserver
82                                                                                                 // has permission to write to.
83                                                                                                 // under Windows, this should be C:\temp
84
85         var     $curl_path              =       "/usr/local/bin/curl";
86                                                                                                 // Snoopy will use cURL for fetching
87                                                                                                 // SSL content if a full system path to
88                                                                                                 // the cURL binary is supplied here.
89                                                                                                 // set to false if you do not have
90                                                                                                 // cURL installed. See http://curl.haxx.se
91                                                                                                 // for details on installing cURL.
92                                                                                                 // Snoopy does *not* use the cURL
93                                                                                                 // library functions built into php,
94                                                                                                 // as these functions are not stable
95                                                                                                 // as of this Snoopy release.
96
97         /**** Private variables ****/
98
99         var     $_maxlinelen    =       4096;                           // max line length (headers)
100
101         var $_httpmethod        =       "GET";                          // default http request method
102         var $_httpversion       =       "HTTP/1.0";                     // default http request version
103         var $_submit_method     =       "POST";                         // default submit method
104         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
105         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
106         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
107         var $_redirectdepth     =       0;                                      // increments on an http redirect
108         var $_frameurls         =       array();                        // frame src urls
109         var $_framedepth        =       0;                                      // increments on frame depth
110
111         var $_isproxy           =       false;                          // set if using a proxy server
112         var $_fp_timeout        =       30;                                     // timeout for socket connection
113
114 /*======================================================================*\
115         Function:       fetch
116         Purpose:        fetch the contents of a web page
117                                 (and possibly other protocols in the
118                                 future like ftp, nntp, gopher, etc.)
119         Input:          $URI    the location of the page to fetch
120         Output:         $this->results  the output text from the fetch
121 \*======================================================================*/
122
123         function fetch($URI)
124         {
125
126                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
127                 $URI_PARTS = parse_url($URI);
128                 if (!empty($URI_PARTS["user"]))
129                         $this->user = $URI_PARTS["user"];
130                 if (!empty($URI_PARTS["pass"]))
131                         $this->pass = $URI_PARTS["pass"];
132                 if (empty($URI_PARTS["query"]))
133                         $URI_PARTS["query"] = '';
134                 if (empty($URI_PARTS["path"]))
135                         $URI_PARTS["path"] = '';
136
137                 switch(strtolower($URI_PARTS["scheme"]))
138                 {
139                         case "http":
140                                 $this->host = $URI_PARTS["host"];
141                                 if(!empty($URI_PARTS["port"]))
142                                         $this->port = $URI_PARTS["port"];
143                                 if($this->_connect($fp))
144                                 {
145                                         if($this->_isproxy)
146                                         {
147                                                 // using proxy, send entire URI
148                                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
149                                         }
150                                         else
151                                         {
152                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
153                                                 // no proxy, send only the path
154                                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155                                         }
156
157                                         $this->_disconnect($fp);
158
159                                         if($this->_redirectaddr)
160                                         {
161                                                 /* url was redirected, check if we've hit the max depth */
162                                                 if($this->maxredirs > $this->_redirectdepth)
163                                                 {
164                                                         // only follow redirect if it's on this site, or offsiteok is true
165                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
166                                                         {
167                                                                 /* follow the redirect */
168                                                                 $this->_redirectdepth++;
169                                                                 $this->lastredirectaddr=$this->_redirectaddr;
170                                                                 $this->fetch($this->_redirectaddr);
171                                                         }
172                                                 }
173                                         }
174
175                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
176                                         {
177                                                 $frameurls = $this->_frameurls;
178                                                 $this->_frameurls = array();
179
180                                                 while(list(,$frameurl) = each($frameurls))
181                                                 {
182                                                         if($this->_framedepth < $this->maxframes)
183                                                         {
184                                                                 $this->fetch($frameurl);
185                                                                 $this->_framedepth++;
186                                                         }
187                                                         else
188                                                                 break;
189                                                 }
190                                         }
191                                 }
192                                 else
193                                 {
194                                         return false;
195                                 }
196                                 return true;
197                                 break;
198                         case "https":
199                                 if(!$this->curl_path)
200                                         return false;
201                                 if(function_exists("is_executable"))
202                                     if (!is_executable($this->curl_path))
203                                         return false;
204                                 $this->host = $URI_PARTS["host"];
205                                 if(!empty($URI_PARTS["port"]))
206                                         $this->port = $URI_PARTS["port"];
207                                 if($this->_isproxy)
208                                 {
209                                         // using proxy, send entire URI
210                                         $this->_httpsrequest($URI,$URI,$this->_httpmethod);
211                                 }
212                                 else
213                                 {
214                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
215                                         // no proxy, send only the path
216                                         $this->_httpsrequest($path, $URI, $this->_httpmethod);
217                                 }
218
219                                 if($this->_redirectaddr)
220                                 {
221                                         /* url was redirected, check if we've hit the max depth */
222                                         if($this->maxredirs > $this->_redirectdepth)
223                                         {
224                                                 // only follow redirect if it's on this site, or offsiteok is true
225                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
226                                                 {
227                                                         /* follow the redirect */
228                                                         $this->_redirectdepth++;
229                                                         $this->lastredirectaddr=$this->_redirectaddr;
230                                                         $this->fetch($this->_redirectaddr);
231                                                 }
232                                         }
233                                 }
234
235                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
236                                 {
237                                         $frameurls = $this->_frameurls;
238                                         $this->_frameurls = array();
239
240                                         while(list(,$frameurl) = each($frameurls))
241                                         {
242                                                 if($this->_framedepth < $this->maxframes)
243                                                 {
244                                                         $this->fetch($frameurl);
245                                                         $this->_framedepth++;
246                                                 }
247                                                 else
248                                                         break;
249                                         }
250                                 }
251                                 return true;
252                                 break;
253                         default:
254                                 // not a valid protocol
255                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
256                                 return false;
257                                 break;
258                 }
259                 return true;
260         }
261
262 /*======================================================================*\
263         Function:       submit
264         Purpose:        submit an http form
265         Input:          $URI    the location to post the data
266                                 $formvars       the formvars to use.
267                                         format: $formvars["var"] = "val";
268                                 $formfiles  an array of files to submit
269                                         format: $formfiles["var"] = "/dir/filename.ext";
270         Output:         $this->results  the text output from the post
271 \*======================================================================*/
272
273         function submit($URI, $formvars="", $formfiles="")
274         {
275                 unset($postdata);
276
277                 $postdata = $this->_prepare_post_body($formvars, $formfiles);
278
279                 $URI_PARTS = parse_url($URI);
280                 if (!empty($URI_PARTS["user"]))
281                         $this->user = $URI_PARTS["user"];
282                 if (!empty($URI_PARTS["pass"]))
283                         $this->pass = $URI_PARTS["pass"];
284                 if (empty($URI_PARTS["query"]))
285                         $URI_PARTS["query"] = '';
286                 if (empty($URI_PARTS["path"]))
287                         $URI_PARTS["path"] = '';
288
289                 switch(strtolower($URI_PARTS["scheme"]))
290                 {
291                         case "http":
292                                 $this->host = $URI_PARTS["host"];
293                                 if(!empty($URI_PARTS["port"]))
294                                         $this->port = $URI_PARTS["port"];
295                                 if($this->_connect($fp))
296                                 {
297                                         if($this->_isproxy)
298                                         {
299                                                 // using proxy, send entire URI
300                                                 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
301                                         }
302                                         else
303                                         {
304                                                 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
305                                                 // no proxy, send only the path
306                                                 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
307                                         }
308
309                                         $this->_disconnect($fp);
310
311                                         if($this->_redirectaddr)
312                                         {
313                                                 /* url was redirected, check if we've hit the max depth */
314                                                 if($this->maxredirs > $this->_redirectdepth)
315                                                 {
316                                                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
317                                                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
318
319                                                         // only follow redirect if it's on this site, or offsiteok is true
320                                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
321                                                         {
322                                                                 /* follow the redirect */
323                                                                 $this->_redirectdepth++;
324                                                                 $this->lastredirectaddr=$this->_redirectaddr;
325                                                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
326                                                                         $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
327                                                                 else
328                                                                         $this->submit($this->_redirectaddr,$formvars, $formfiles);
329                                                         }
330                                                 }
331                                         }
332
333                                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
334                                         {
335                                                 $frameurls = $this->_frameurls;
336                                                 $this->_frameurls = array();
337
338                                                 while(list(,$frameurl) = each($frameurls))
339                                                 {
340                                                         if($this->_framedepth < $this->maxframes)
341                                                         {
342                                                                 $this->fetch($frameurl);
343                                                                 $this->_framedepth++;
344                                                         }
345                                                         else
346                                                                 break;
347                                                 }
348                                         }
349
350                                 }
351                                 else
352                                 {
353                                         return false;
354                                 }
355                                 return true;
356                                 break;
357                         case "https":
358                                 if(!$this->curl_path)
359                                         return false;
360                                 if(function_exists("is_executable"))
361                                     if (!is_executable($this->curl_path))
362                                         return false;
363                                 $this->host = $URI_PARTS["host"];
364                                 if(!empty($URI_PARTS["port"]))
365                                         $this->port = $URI_PARTS["port"];
366                                 if($this->_isproxy)
367                                 {
368                                         // using proxy, send entire URI
369                                         $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
370                                 }
371                                 else
372                                 {
373                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
374                                         // no proxy, send only the path
375                                         $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376                                 }
377
378                                 if($this->_redirectaddr)
379                                 {
380                                         /* url was redirected, check if we've hit the max depth */
381                                         if($this->maxredirs > $this->_redirectdepth)
382                                         {
383                                                 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
384                                                         $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
385
386                                                 // only follow redirect if it's on this site, or offsiteok is true
387                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
388                                                 {
389                                                         /* follow the redirect */
390                                                         $this->_redirectdepth++;
391                                                         $this->lastredirectaddr=$this->_redirectaddr;
392                                                         if( strpos( $this->_redirectaddr, "?" ) > 0 )
393                                                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
394                                                         else
395                                                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
396                                                 }
397                                         }
398                                 }
399
400                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
401                                 {
402                                         $frameurls = $this->_frameurls;
403                                         $this->_frameurls = array();
404
405                                         while(list(,$frameurl) = each($frameurls))
406                                         {
407                                                 if($this->_framedepth < $this->maxframes)
408                                                 {
409                                                         $this->fetch($frameurl);
410                                                         $this->_framedepth++;
411                                                 }
412                                                 else
413                                                         break;
414                                         }
415                                 }
416                                 return true;
417                                 break;
418
419                         default:
420                                 // not a valid protocol
421                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
422                                 return false;
423                                 break;
424                 }
425                 return true;
426         }
427
428 /*======================================================================*\
429         Function:       fetchlinks
430         Purpose:        fetch the links from a web page
431         Input:          $URI    where you are fetching from
432         Output:         $this->results  an array of the URLs
433 \*======================================================================*/
434
435         function fetchlinks($URI)
436         {
437                 if ($this->fetch($URI))
438                 {
439                         if($this->lastredirectaddr)
440                                 $URI = $this->lastredirectaddr;
441                         if(is_array($this->results))
442                         {
443                                 for($x=0;$x<count($this->results);$x++)
444                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
445                         }
446                         else
447                                 $this->results = $this->_striplinks($this->results);
448
449                         if($this->expandlinks)
450                                 $this->results = $this->_expandlinks($this->results, $URI);
451                         return true;
452                 }
453                 else
454                         return false;
455         }
456
457 /*======================================================================*\
458         Function:       fetchform
459         Purpose:        fetch the form elements from a web page
460         Input:          $URI    where you are fetching from
461         Output:         $this->results  the resulting html form
462 \*======================================================================*/
463
464         function fetchform($URI)
465         {
466
467                 if ($this->fetch($URI))
468                 {
469
470                         if(is_array($this->results))
471                         {
472                                 for($x=0;$x<count($this->results);$x++)
473                                         $this->results[$x] = $this->_stripform($this->results[$x]);
474                         }
475                         else
476                                 $this->results = $this->_stripform($this->results);
477
478                         return true;
479                 }
480                 else
481                         return false;
482         }
483
484
485 /*======================================================================*\
486         Function:       fetchtext
487         Purpose:        fetch the text from a web page, stripping the links
488         Input:          $URI    where you are fetching from
489         Output:         $this->results  the text from the web page
490 \*======================================================================*/
491
492         function fetchtext($URI)
493         {
494                 if($this->fetch($URI))
495                 {
496                         if(is_array($this->results))
497                         {
498                                 for($x=0;$x<count($this->results);$x++)
499                                         $this->results[$x] = $this->_striptext($this->results[$x]);
500                         }
501                         else
502                                 $this->results = $this->_striptext($this->results);
503                         return true;
504                 }
505                 else
506                         return false;
507         }
508
509 /*======================================================================*\
510         Function:       submitlinks
511         Purpose:        grab links from a form submission
512         Input:          $URI    where you are submitting from
513         Output:         $this->results  an array of the links from the post
514 \*======================================================================*/
515
516         function submitlinks($URI, $formvars="", $formfiles="")
517         {
518                 if($this->submit($URI,$formvars, $formfiles))
519                 {
520                         if($this->lastredirectaddr)
521                                 $URI = $this->lastredirectaddr;
522                         if(is_array($this->results))
523                         {
524                                 for($x=0;$x<count($this->results);$x++)
525                                 {
526                                         $this->results[$x] = $this->_striplinks($this->results[$x]);
527                                         if($this->expandlinks)
528                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
529                                 }
530                         }
531                         else
532                         {
533                                 $this->results = $this->_striplinks($this->results);
534                                 if($this->expandlinks)
535                                         $this->results = $this->_expandlinks($this->results,$URI);
536                         }
537                         return true;
538                 }
539                 else
540                         return false;
541         }
542
543 /*======================================================================*\
544         Function:       submittext
545         Purpose:        grab text from a form submission
546         Input:          $URI    where you are submitting from
547         Output:         $this->results  the text from the web page
548 \*======================================================================*/
549
550         function submittext($URI, $formvars = "", $formfiles = "")
551         {
552                 if($this->submit($URI,$formvars, $formfiles))
553                 {
554                         if($this->lastredirectaddr)
555                                 $URI = $this->lastredirectaddr;
556                         if(is_array($this->results))
557                         {
558                                 for($x=0;$x<count($this->results);$x++)
559                                 {
560                                         $this->results[$x] = $this->_striptext($this->results[$x]);
561                                         if($this->expandlinks)
562                                                 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
563                                 }
564                         }
565                         else
566                         {
567                                 $this->results = $this->_striptext($this->results);
568                                 if($this->expandlinks)
569                                         $this->results = $this->_expandlinks($this->results,$URI);
570                         }
571                         return true;
572                 }
573                 else
574                         return false;
575         }
576
577
578
579 /*======================================================================*\
580         Function:       set_submit_multipart
581         Purpose:        Set the form submission content type to
582                                 multipart/form-data
583 \*======================================================================*/
584         function set_submit_multipart()
585         {
586                 $this->_submit_type = "multipart/form-data";
587         }
588
589
590 /*======================================================================*\
591         Function:       set_submit_normal
592         Purpose:        Set the form submission content type to
593                                 application/x-www-form-urlencoded
594 \*======================================================================*/
595         function set_submit_normal()
596         {
597                 $this->_submit_type = "application/x-www-form-urlencoded";
598         }
599
600
601
602
603 /*======================================================================*\
604         Private functions
605 \*======================================================================*/
606
607
608 /*======================================================================*\
609         Function:       _striplinks
610         Purpose:        strip the hyperlinks from an html document
611         Input:          $document       document to strip.
612         Output:         $match          an array of the links
613 \*======================================================================*/
614
615         function _striplinks($document)
616         {
617                 preg_match_all("'<\s*a\s.*?href\s*=\s*                  # find <a href=
618                                                 ([\"\'])?                                       # find single or double quote
619                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
620                                                                                                         # quote, otherwise match up to next space
621                                                 'isx",$document,$links);
622
623
624                 // catenate the non-empty matches from the conditional subpattern
625
626                 while(list($key,$val) = each($links[2]))
627                 {
628                         if(!empty($val))
629                                 $match[] = $val;
630                 }
631
632                 while(list($key,$val) = each($links[3]))
633                 {
634                         if(!empty($val))
635                                 $match[] = $val;
636                 }
637
638                 // return the links
639                 return $match;
640         }
641
642 /*======================================================================*\
643         Function:       _stripform
644         Purpose:        strip the form elements from an html document
645         Input:          $document       document to strip.
646         Output:         $match          an array of the links
647 \*======================================================================*/
648
649         function _stripform($document)
650         {
651                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
652
653                 // catenate the matches
654                 $match = implode("\r\n",$elements[0]);
655
656                 // return the links
657                 return $match;
658         }
659
660
661
662 /*======================================================================*\
663         Function:       _striptext
664         Purpose:        strip the text from an html document
665         Input:          $document       document to strip.
666         Output:         $text           the resulting text
667 \*======================================================================*/
668
669         function _striptext($document)
670         {
671
672                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
673                 // so, list your entities one by one here. I included some of the
674                 // more common ones.
675
676                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
677                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
678                                                 "'([\r\n])[\s]+'",                                      // strip out white space
679                                                 "'&(quot|#34|#034|#x22);'i",            // replace html entities
680                                                 "'&(amp|#38|#038|#x26);'i",                     // added hexadecimal values
681                                                 "'&(lt|#60|#060|#x3c);'i",
682                                                 "'&(gt|#62|#062|#x3e);'i",
683                                                 "'&(nbsp|#160|#xa0);'i",
684                                                 "'&(iexcl|#161);'i",
685                                                 "'&(cent|#162);'i",
686                                                 "'&(pound|#163);'i",
687                                                 "'&(copy|#169);'i",
688                                                 "'&(reg|#174);'i",
689                                                 "'&(deg|#176);'i",
690                                                 "'&(#39|#039|#x27);'",
691                                                 "'&(euro|#8364);'i",                            // europe
692                                                 "'&a(uml|UML);'",                                       // german
693                                                 "'&o(uml|UML);'",
694                                                 "'&u(uml|UML);'",
695                                                 "'&A(uml|UML);'",
696                                                 "'&O(uml|UML);'",
697                                                 "'&U(uml|UML);'",
698                                                 "'&szlig;'i",
699                                                 );
700                 $replace = array(       "",
701                                                         "",
702                                                         "\\1",
703                                                         "\"",
704                                                         "&",
705                                                         "<",
706                                                         ">",
707                                                         " ",
708                                                         chr(161),
709                                                         chr(162),
710                                                         chr(163),
711                                                         chr(169),
712                                                         chr(174),
713                                                         chr(176),
714                                                         chr(39),
715                                                         chr(128),
716                                                         "ä",
717                                                         "ö",
718                                                         "ü",
719                                                         "Ä",
720                                                         "Ö",
721                                                         "Ãœ",
722                                                         "ß",
723                                                 );
724
725                 $text = preg_replace($search,$replace,$document);
726
727                 return $text;
728         }
729
730 /*======================================================================*\
731         Function:       _expandlinks
732         Purpose:        expand each link into a fully qualified URL
733         Input:          $links                  the links to qualify
734                                 $URI                    the full URI to get the base from
735         Output:         $expandedLinks  the expanded links
736 \*======================================================================*/
737
738         function _expandlinks($links,$URI)
739         {
740
741                 preg_match("/^[^\?]+/",$URI,$match);
742
743                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
744                 $match = preg_replace("|/$|","",$match);
745                 $match_part = parse_url($match);
746                 $match_root =
747                 $match_part["scheme"]."://".$match_part["host"];
748
749                 $search = array(        "|^http://".preg_quote($this->host)."|i",
750                                                         "|^(\/)|i",
751                                                         "|^(?!http://)(?!mailto:)|i",
752                                                         "|/\./|",
753                                                         "|/[^\/]+/\.\./|"
754                                                 );
755
756                 $replace = array(       "",
757                                                         $match_root."/",
758                                                         $match."/",
759                                                         "/",
760                                                         "/"
761                                                 );
762
763                 $expandedLinks = preg_replace($search,$replace,$links);
764
765                 return $expandedLinks;
766         }
767
768 /*======================================================================*\
769         Function:       _httprequest
770         Purpose:        go get the http data from the server
771         Input:          $url            the url to fetch
772                                 $fp                     the current open file pointer
773                                 $URI            the full URI
774                                 $body           body contents to send if any (POST)
775         Output:
776 \*======================================================================*/
777
778         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
779         {
780                 $cookie_headers = '';
781                 if($this->passcookies && $this->_redirectaddr)
782                         $this->setcookies();
783
784                 $URI_PARTS = parse_url($URI);
785                 if(empty($url))
786                         $url = "/";
787                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
788                 if(!empty($this->agent))
789                         $headers .= "User-Agent: ".$this->agent."\r\n";
790                 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
791                         $headers .= "Host: ".$this->host;
792                         if(!empty($this->port) && $this->port != 80)
793                                 $headers .= ":".$this->port;
794                         $headers .= "\r\n";
795                 }
796                 if(!empty($this->accept))
797                         $headers .= "Accept: ".$this->accept."\r\n";
798                 if(!empty($this->referer))
799                         $headers .= "Referer: ".$this->referer."\r\n";
800                 if(!empty($this->cookies))
801                 {
802                         if(!is_array($this->cookies))
803                                 $this->cookies = (array)$this->cookies;
804
805                         reset($this->cookies);
806                         if ( count($this->cookies) > 0 ) {
807                                 $cookie_headers .= 'Cookie: ';
808                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
809                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
810                                 }
811                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
812                         }
813                 }
814                 if(!empty($this->rawheaders))
815                 {
816                         if(!is_array($this->rawheaders))
817                                 $this->rawheaders = (array)$this->rawheaders;
818                         while(list($headerKey,$headerVal) = each($this->rawheaders))
819                                 $headers .= $headerKey.": ".$headerVal."\r\n";
820                 }
821                 if(!empty($content_type)) {
822                         $headers .= "Content-type: $content_type";
823                         if ($content_type == "multipart/form-data")
824                                 $headers .= "; boundary=".$this->_mime_boundary;
825                         $headers .= "\r\n";
826                 }
827                 if(!empty($body))
828                         $headers .= "Content-length: ".strlen($body)."\r\n";
829                 if(!empty($this->user) || !empty($this->pass))
830                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
831
832                 //add proxy auth headers
833                 if(!empty($this->proxy_user))
834                         $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
835
836
837                 $headers .= "\r\n";
838
839                 // set the read timeout if needed
840                 if ($this->read_timeout > 0)
841                         socket_set_timeout($fp, $this->read_timeout);
842                 $this->timed_out = false;
843
844                 fwrite($fp,$headers.$body,strlen($headers.$body));
845
846                 $this->_redirectaddr = false;
847                 unset($this->headers);
848
849                 while($currentHeader = fgets($fp,$this->_maxlinelen))
850                 {
851                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
852                         {
853                                 $this->status=-100;
854                                 return false;
855                         }
856
857                         if($currentHeader == "\r\n")
858                                 break;
859
860                         // if a header begins with Location: or URI:, set the redirect
861                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
862                         {
863                                 // get URL portion of the redirect
864                                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
865                                 // look for :// in the Location header to see if hostname is included
866                                 if(!preg_match("|\:\/\/|",$matches[2]))
867                                 {
868                                         // no host in the path, so prepend
869                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
870                                         // eliminate double slash
871                                         if(!preg_match("|^/|",$matches[2]))
872                                                         $this->_redirectaddr .= "/".$matches[2];
873                                         else
874                                                         $this->_redirectaddr .= $matches[2];
875                                 }
876                                 else
877                                         $this->_redirectaddr = $matches[2];
878                         }
879
880                         if(preg_match("|^HTTP/|",$currentHeader))
881                         {
882                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
883                                 {
884                                         $this->status= $status[1];
885                 }
886                                 $this->response_code = $currentHeader;
887                         }
888
889                         $this->headers[] = $currentHeader;
890                 }
891
892                 $results = '';
893                 do {
894                 $_data = fread($fp, $this->maxlength);
895                 if (strlen($_data) == 0) {
896                         break;
897                 }
898                 $results .= $_data;
899                 } while(true);
900
901                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
902                 {
903                         $this->status=-100;
904                         return false;
905                 }
906
907                 // check if there is a a redirect meta tag
908
909                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
910
911                 {
912                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
913                 }
914
915                 // have we hit our frame depth and is there frame src to fetch?
916                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
917                 {
918                         $this->results[] = $results;
919                         for($x=0; $x<count($match[1]); $x++)
920                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
921                 }
922                 // have we already fetched framed content?
923                 elseif(is_array($this->results))
924                         $this->results[] = $results;
925                 // no framed content
926                 else
927                         $this->results = $results;
928
929                 return true;
930         }
931
932 /*======================================================================*\
933         Function:       _httpsrequest
934         Purpose:        go get the https data from the server using curl
935         Input:          $url            the url to fetch
936                                 $URI            the full URI
937                                 $body           body contents to send if any (POST)
938         Output:
939 \*======================================================================*/
940
941         function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
942         {
943                 if($this->passcookies && $this->_redirectaddr)
944                         $this->setcookies();
945
946                 $headers = array();
947
948                 $URI_PARTS = parse_url($URI);
949                 if(empty($url))
950                         $url = "/";
951                 // GET ... header not needed for curl
952                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
953                 if(!empty($this->agent))
954                         $headers[] = "User-Agent: ".$this->agent;
955                 if(!empty($this->host))
956                         if(!empty($this->port))
957                                 $headers[] = "Host: ".$this->host.":".$this->port;
958                         else
959                                 $headers[] = "Host: ".$this->host;
960                 if(!empty($this->accept))
961                         $headers[] = "Accept: ".$this->accept;
962                 if(!empty($this->referer))
963                         $headers[] = "Referer: ".$this->referer;
964                 if(!empty($this->cookies))
965                 {
966                         if(!is_array($this->cookies))
967                                 $this->cookies = (array)$this->cookies;
968
969                         reset($this->cookies);
970                         if ( count($this->cookies) > 0 ) {
971                                 $cookie_str = 'Cookie: ';
972                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
973                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
974                                 }
975                                 $headers[] = substr($cookie_str,0,-2);
976                         }
977                 }
978                 if(!empty($this->rawheaders))
979                 {
980                         if(!is_array($this->rawheaders))
981                                 $this->rawheaders = (array)$this->rawheaders;
982                         while(list($headerKey,$headerVal) = each($this->rawheaders))
983                                 $headers[] = $headerKey.": ".$headerVal;
984                 }
985                 if(!empty($content_type)) {
986                         if ($content_type == "multipart/form-data")
987                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
988                         else
989                                 $headers[] = "Content-type: $content_type";
990                 }
991                 if(!empty($body))
992                         $headers[] = "Content-length: ".strlen($body);
993                 if(!empty($this->user) || !empty($this->pass))
994                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
995
996                 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
997                         $safer_header = strtr( $headers[$curr_header], "\"", " " );
998                         $cmdline_params .= " -H \"".$safer_header."\"";
999                 }
1000
1001                 if(!empty($body))
1002                         $cmdline_params .= " -d \"$body\"";
1003
1004                 if($this->read_timeout > 0)
1005                         $cmdline_params .= " -m ".$this->read_timeout;
1006
1007                 $headerfile = tempnam($temp_dir, "sno");
1008
1009                 exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1010
1011                 if($return)
1012                 {
1013                         $this->error = "Error: cURL could not retrieve the document, error $return.";
1014                         return false;
1015                 }
1016
1017
1018                 $results = implode("\r\n",$results);
1019
1020                 $result_headers = file("$headerfile");
1021
1022                 $this->_redirectaddr = false;
1023                 unset($this->headers);
1024
1025                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1026                 {
1027
1028                         // if a header begins with Location: or URI:, set the redirect
1029                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1030                         {
1031                                 // get URL portion of the redirect
1032                                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1033                                 // look for :// in the Location header to see if hostname is included
1034                                 if(!preg_match("|\:\/\/|",$matches[2]))
1035                                 {
1036                                         // no host in the path, so prepend
1037                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1038                                         // eliminate double slash
1039                                         if(!preg_match("|^/|",$matches[2]))
1040                                                         $this->_redirectaddr .= "/".$matches[2];
1041                                         else
1042                                                         $this->_redirectaddr .= $matches[2];
1043                                 }
1044                                 else
1045                                         $this->_redirectaddr = $matches[2];
1046                         }
1047
1048                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1049                                 $this->response_code = $result_headers[$currentHeader];
1050
1051                         $this->headers[] = $result_headers[$currentHeader];
1052                 }
1053
1054                 // check if there is a a redirect meta tag
1055
1056                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1057                 {
1058                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1059                 }
1060
1061                 // have we hit our frame depth and is there frame src to fetch?
1062                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1063                 {
1064                         $this->results[] = $results;
1065                         for($x=0; $x<count($match[1]); $x++)
1066                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1067                 }
1068                 // have we already fetched framed content?
1069                 elseif(is_array($this->results))
1070                         $this->results[] = $results;
1071                 // no framed content
1072                 else
1073                         $this->results = $results;
1074
1075                 unlink("$headerfile");
1076
1077                 return true;
1078         }
1079
1080 /*======================================================================*\
1081         Function:       setcookies()
1082         Purpose:        set cookies for a redirection
1083 \*======================================================================*/
1084
1085         function setcookies()
1086         {
1087                 for($x=0; $x<count($this->headers); $x++)
1088                 {
1089                 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1090                         $this->cookies[$match[1]] = urldecode($match[2]);
1091                 }
1092         }
1093
1094
1095 /*======================================================================*\
1096         Function:       _check_timeout
1097         Purpose:        checks whether timeout has occurred
1098         Input:          $fp     file pointer
1099 \*======================================================================*/
1100
1101         function _check_timeout($fp)
1102         {
1103                 if ($this->read_timeout > 0) {
1104                         $fp_status = socket_get_status($fp);
1105                         if ($fp_status["timed_out"]) {
1106                                 $this->timed_out = true;
1107                                 return true;
1108                         }
1109                 }
1110                 return false;
1111         }
1112
1113 /*======================================================================*\
1114         Function:       _connect
1115         Purpose:        make a socket connection
1116         Input:          $fp     file pointer
1117 \*======================================================================*/
1118
1119         function _connect(&$fp)
1120         {
1121                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1122                         {
1123                                 $this->_isproxy = true;
1124
1125                                 $host = $this->proxy_host;
1126                                 $port = $this->proxy_port;
1127                         }
1128                 else
1129                 {
1130                         $host = $this->host;
1131                         $port = $this->port;
1132                 }
1133
1134                 $this->status = 0;
1135
1136                 if($fp = fsockopen(
1137                                         $host,
1138                                         $port,
1139                                         $errno,
1140                                         $errstr,
1141                                         $this->_fp_timeout
1142                                         ))
1143                 {
1144                         // socket connection succeeded
1145
1146                         return true;
1147                 }
1148                 else
1149                 {
1150                         // socket connection failed
1151                         $this->status = $errno;
1152                         switch($errno)
1153                         {
1154                                 case -3:
1155                                         $this->error="socket creation failed (-3)";
1156                                 case -4:
1157                                         $this->error="dns lookup failure (-4)";
1158                                 case -5:
1159                                         $this->error="connection refused or timed out (-5)";
1160                                 default:
1161                                         $this->error="connection failed (".$errno.")";
1162                         }
1163                         return false;
1164                 }
1165         }
1166 /*======================================================================*\
1167         Function:       _disconnect
1168         Purpose:        disconnect a socket connection
1169         Input:          $fp     file pointer
1170 \*======================================================================*/
1171
1172         function _disconnect($fp)
1173         {
1174                 return(fclose($fp));
1175         }
1176
1177
1178 /*======================================================================*\
1179         Function:       _prepare_post_body
1180         Purpose:        Prepare post body according to encoding type
1181         Input:          $formvars  - form variables
1182                                 $formfiles - form upload files
1183         Output:         post body
1184 \*======================================================================*/
1185
1186         function _prepare_post_body($formvars, $formfiles)
1187         {
1188                 settype($formvars, "array");
1189                 settype($formfiles, "array");
1190                 $postdata = '';
1191
1192                 if (count($formvars) == 0 && count($formfiles) == 0)
1193                         return;
1194
1195                 switch ($this->_submit_type) {
1196                         case "application/x-www-form-urlencoded":
1197                                 reset($formvars);
1198                                 while(list($key,$val) = each($formvars)) {
1199                                         if (is_array($val) || is_object($val)) {
1200                                                 while (list($cur_key, $cur_val) = each($val)) {
1201                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1202                                                 }
1203                                         } else
1204                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
1205                                 }
1206                                 break;
1207
1208                         case "multipart/form-data":
1209                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1210
1211                                 reset($formvars);
1212                                 while(list($key,$val) = each($formvars)) {
1213                                         if (is_array($val) || is_object($val)) {
1214                                                 while (list($cur_key, $cur_val) = each($val)) {
1215                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
1216                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1217                                                         $postdata .= "$cur_val\r\n";
1218                                                 }
1219                                         } else {
1220                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1221                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1222                                                 $postdata .= "$val\r\n";
1223                                         }
1224                                 }
1225
1226                                 reset($formfiles);
1227                                 while (list($field_name, $file_names) = each($formfiles)) {
1228                                         settype($file_names, "array");
1229                                         while (list(, $file_name) = each($file_names)) {
1230                                                 if (!is_readable($file_name)) continue;
1231
1232                                                 $fp = fopen($file_name, "r");
1233                                                 $file_content = fread($fp, filesize($file_name));
1234                                                 fclose($fp);
1235                                                 $base_name = basename($file_name);
1236
1237                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
1238                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1239                                                 $postdata .= "$file_content\r\n";
1240                                         }
1241                                 }
1242                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1243                                 break;
1244                 }
1245
1246                 return $postdata;
1247         }
1248 }
1249 endif;
1250 ?>