1 | <?php |
2 | |
3 | /** |
4 | * Deprecated. Use WP_HTTP (http.php, class-http.php) instead. |
5 | */ |
6 | _deprecated_file( basename( __FILE__ ), '3.0', WPINC . '/http.php' ); |
7 | |
8 | if ( !class_exists( 'Snoopy' ) ) : |
9 | /************************************************* |
10 | |
11 | Snoopy - the PHP net client |
12 | Author: Monte Ohrt <monte@ispi.net> |
13 | Copyright (c): 1999-2008 New Digital Group, all rights reserved |
14 | Version: 1.2.4 |
15 | |
16 | * This library is free software; you can redistribute it and/or |
17 | * modify it under the terms of the GNU Lesser General Public |
18 | * License as published by the Free Software Foundation; either |
19 | * version 2.1 of the License, or (at your option) any later version. |
20 | * |
21 | * This library is distributed in the hope that it will be useful, |
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | * Lesser General Public License for more details. |
25 | * |
26 | * You should have received a copy of the GNU Lesser General Public |
27 | * License along with this library; if not, write to the Free Software |
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
29 | |
30 | You may contact the author of Snoopy by e-mail at: |
31 | monte@ohrt.com |
32 | |
33 | The latest version of Snoopy can be obtained from: |
34 | http://snoopy.sourceforge.net/ |
35 | |
36 | *************************************************/ |
37 | |
38 | class Snoopy |
39 | { |
40 | /**** Public variables ****/ |
41 | |
42 | /* user definable vars */ |
43 | |
44 | var $host = "www.php.net"; // host name we are connecting to |
45 | var $port = 80; // port we are connecting to |
46 | var $proxy_host = ""; // proxy host to use |
47 | var $proxy_port = ""; // proxy port to use |
48 | var $proxy_user = ""; // proxy user to use |
49 | var $proxy_pass = ""; // proxy password to use |
50 | |
51 | var $agent = "Snoopy v1.2.4"; // agent we masquerade as |
52 | var $referer = ""; // referer info to pass |
53 | var $cookies = array(); // array of cookies to pass |
54 | // $cookies["username"]="joe"; |
55 | var $rawheaders = array(); // array of raw headers to send |
56 | // $rawheaders["Content-type"]="text/html"; |
57 | |
58 | var $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
59 | var $lastredirectaddr = ""; // contains address of last redirected address |
60 | var $offsiteok = true; // allows redirection off-site |
61 | var $maxframes = 0; // frame content depth maximum. 0 = disallow |
62 | var $expandlinks = true; // expand links to fully qualified URLs. |
63 | // this only applies to fetchlinks() |
64 | // submitlinks(), and submittext() |
65 | var $passcookies = true; // pass set cookies back through redirects |
66 | // NOTE: this currently does not respect |
67 | // dates, domains or paths. |
68 | |
69 | var $user = ""; // user for http authentication |
70 | var $pass = ""; // password for http authentication |
71 | |
72 | // http accept types |
73 | var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
74 | |
75 | var $results = ""; // where the content is put |
76 | |
77 | var $error = ""; // error messages sent here |
78 | var $response_code = ""; // response code returned from server |
79 | var $headers = array(); // headers returned from server sent here |
80 | var $maxlength = 500000; // max return data length (body) |
81 | var $read_timeout = 0; // timeout on read operations, in seconds |
82 | // supported only since PHP 4 Beta 4 |
83 | // set to 0 to disallow timeouts |
84 | var $timed_out = false; // if a read operation timed out |
85 | var $status = 0; // http request status |
86 | |
87 | var $temp_dir = "/tmp"; // temporary directory that the webserver |
88 | // has permission to write to. |
89 | // under Windows, this should be C:\temp |
90 | |
91 | var $curl_path = "/usr/local/bin/curl"; |
92 | // Snoopy will use cURL for fetching |
93 | // SSL content if a full system path to |
94 | // the cURL binary is supplied here. |
95 | // set to false if you do not have |
96 | // cURL installed. See http://curl.haxx.se |
97 | // for details on installing cURL. |
98 | // Snoopy does *not* use the cURL |
99 | // library functions built into php, |
100 | // as these functions are not stable |
101 | // as of this Snoopy release. |
102 | |
103 | /**** Private variables ****/ |
104 | |
105 | var $_maxlinelen = 4096; // max line length (headers) |
106 | |
107 | var $_httpmethod = "GET"; // default http request method |
108 | var $_httpversion = "HTTP/1.0"; // default http request version |
109 | var $_submit_method = "POST"; // default submit method |
110 | var $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
111 | var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
112 | var $_redirectaddr = false; // will be set if page fetched is a redirect |
113 | var $_redirectdepth = 0; // increments on an http redirect |
114 | var $_frameurls = array(); // frame src urls |
115 | var $_framedepth = 0; // increments on frame depth |
116 | |
117 | var $_isproxy = false; // set if using a proxy server |
118 | var $_fp_timeout = 30; // timeout for socket connection |
119 | |
120 | /*======================================================================*\ |
121 | Function: fetch |
122 | Purpose: fetch the contents of a web page |
123 | (and possibly other protocols in the |
124 | future like ftp, nntp, gopher, etc.) |
125 | Input: $URI the location of the page to fetch |
126 | Output: $this->results the output text from the fetch |
127 | \*======================================================================*/ |
128 | |
129 | function fetch($URI) |
130 | { |
131 | |
132 | //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
133 | $URI_PARTS = parse_url($URI); |
134 | if (!empty($URI_PARTS["user"])) |
135 | $this->user = $URI_PARTS["user"]; |
136 | if (!empty($URI_PARTS["pass"])) |
137 | $this->pass = $URI_PARTS["pass"]; |
138 | if (empty($URI_PARTS["query"])) |
139 | $URI_PARTS["query"] = ''; |
140 | if (empty($URI_PARTS["path"])) |
141 | $URI_PARTS["path"] = ''; |
142 | |
143 | switch(strtolower($URI_PARTS["scheme"])) |
144 | { |
145 | case "http": |
146 | $this->host = $URI_PARTS["host"]; |
147 | if(!empty($URI_PARTS["port"])) |
148 | $this->port = $URI_PARTS["port"]; |
149 | if($this->_connect($fp)) |
150 | { |
151 | if($this->_isproxy) |
152 | { |
153 | // using proxy, send entire URI |
154 | $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); |
155 | } |
156 | else |
157 | { |
158 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
159 | // no proxy, send only the path |
160 | $this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
161 | } |
162 | |
163 | $this->_disconnect($fp); |
164 | |
165 | if($this->_redirectaddr) |
166 | { |
167 | /* url was redirected, check if we've hit the max depth */ |
168 | if($this->maxredirs > $this->_redirectdepth) |
169 | { |
170 | // only follow redirect if it's on this site, or offsiteok is true |
171 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
172 | { |
173 | /* follow the redirect */ |
174 | $this->_redirectdepth++; |
175 | $this->lastredirectaddr=$this->_redirectaddr; |
176 | $this->fetch($this->_redirectaddr); |
177 | } |
178 | } |
179 | } |
180 | |
181 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
182 | { |
183 | $frameurls = $this->_frameurls; |
184 | $this->_frameurls = array(); |
185 | |
186 | while(list(,$frameurl) = each($frameurls)) |
187 | { |
188 | if($this->_framedepth < $this->maxframes) |
189 | { |
190 | $this->fetch($frameurl); |
191 | $this->_framedepth++; |
192 | } |
193 | else |
194 | break; |
195 | } |
196 | } |
197 | } |
198 | else |
199 | { |
200 | return false; |
201 | } |
202 | return true; |
203 | break; |
204 | case "https": |
205 | if(!$this->curl_path) |
206 | return false; |
207 | if(function_exists("is_executable")) |
208 | if (!is_executable($this->curl_path)) |
209 | return false; |
210 | $this->host = $URI_PARTS["host"]; |
211 | if(!empty($URI_PARTS["port"])) |
212 | $this->port = $URI_PARTS["port"]; |
213 | if($this->_isproxy) |
214 | { |
215 | // using proxy, send entire URI |
216 | $this->_httpsrequest($URI,$URI,$this->_httpmethod); |
217 | } |
218 | else |
219 | { |
220 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
221 | // no proxy, send only the path |
222 | $this->_httpsrequest($path, $URI, $this->_httpmethod); |
223 | } |
224 | |
225 | if($this->_redirectaddr) |
226 | { |
227 | /* url was redirected, check if we've hit the max depth */ |
228 | if($this->maxredirs > $this->_redirectdepth) |
229 | { |
230 | // only follow redirect if it's on this site, or offsiteok is true |
231 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
232 | { |
233 | /* follow the redirect */ |
234 | $this->_redirectdepth++; |
235 | $this->lastredirectaddr=$this->_redirectaddr; |
236 | $this->fetch($this->_redirectaddr); |
237 | } |
238 | } |
239 | } |
240 | |
241 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
242 | { |
243 | $frameurls = $this->_frameurls; |
244 | $this->_frameurls = array(); |
245 | |
246 | while(list(,$frameurl) = each($frameurls)) |
247 | { |
248 | if($this->_framedepth < $this->maxframes) |
249 | { |
250 | $this->fetch($frameurl); |
251 | $this->_framedepth++; |
252 | } |
253 | else |
254 | break; |
255 | } |
256 | } |
257 | return true; |
258 | break; |
259 | default: |
260 | // not a valid protocol |
261 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
262 | return false; |
263 | break; |
264 | } |
265 | return true; |
266 | } |
267 | |
268 | /*======================================================================*\ |
269 | Function: submit |
270 | Purpose: submit an http form |
271 | Input: $URI the location to post the data |
272 | $formvars the formvars to use. |
273 | format: $formvars["var"] = "val"; |
274 | $formfiles an array of files to submit |
275 | format: $formfiles["var"] = "/dir/filename.ext"; |
276 | Output: $this->results the text output from the post |
277 | \*======================================================================*/ |
278 | |
279 | function submit($URI, $formvars="", $formfiles="") |
280 | { |
281 | unset($postdata); |
282 | |
283 | $postdata = $this->_prepare_post_body($formvars, $formfiles); |
284 | |
285 | $URI_PARTS = parse_url($URI); |
286 | if (!empty($URI_PARTS["user"])) |
287 | $this->user = $URI_PARTS["user"]; |
288 | if (!empty($URI_PARTS["pass"])) |
289 | $this->pass = $URI_PARTS["pass"]; |
290 | if (empty($URI_PARTS["query"])) |
291 | $URI_PARTS["query"] = ''; |
292 | if (empty($URI_PARTS["path"])) |
293 | $URI_PARTS["path"] = ''; |
294 | |
295 | switch(strtolower($URI_PARTS["scheme"])) |
296 | { |
297 | case "http": |
298 | $this->host = $URI_PARTS["host"]; |
299 | if(!empty($URI_PARTS["port"])) |
300 | $this->port = $URI_PARTS["port"]; |
301 | if($this->_connect($fp)) |
302 | { |
303 | if($this->_isproxy) |
304 | { |
305 | // using proxy, send entire URI |
306 | $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata); |
307 | } |
308 | else |
309 | { |
310 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
311 | // no proxy, send only the path |
312 | $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
313 | } |
314 | |
315 | $this->_disconnect($fp); |
316 | |
317 | if($this->_redirectaddr) |
318 | { |
319 | /* url was redirected, check if we've hit the max depth */ |
320 | if($this->maxredirs > $this->_redirectdepth) |
321 | { |
322 | if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) |
323 | $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); |
324 | |
325 | // only follow redirect if it's on this site, or offsiteok is true |
326 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
327 | { |
328 | /* follow the redirect */ |
329 | $this->_redirectdepth++; |
330 | $this->lastredirectaddr=$this->_redirectaddr; |
331 | if( strpos( $this->_redirectaddr, "?" ) > 0 ) |
332 | $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
333 | else |
334 | $this->submit($this->_redirectaddr,$formvars, $formfiles); |
335 | } |
336 | } |
337 | } |
338 | |
339 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
340 | { |
341 | $frameurls = $this->_frameurls; |
342 | $this->_frameurls = array(); |
343 | |
344 | while(list(,$frameurl) = each($frameurls)) |
345 | { |
346 | if($this->_framedepth < $this->maxframes) |
347 | { |
348 | $this->fetch($frameurl); |
349 | $this->_framedepth++; |
350 | } |
351 | else |
352 | break; |
353 | } |
354 | } |
355 | |
356 | } |
357 | else |
358 | { |
359 | return false; |
360 | } |
361 | return true; |
362 | break; |
363 | case "https": |
364 | if(!$this->curl_path) |
365 | return false; |
366 | if(function_exists("is_executable")) |
367 | if (!is_executable($this->curl_path)) |
368 | return false; |
369 | $this->host = $URI_PARTS["host"]; |
370 | if(!empty($URI_PARTS["port"])) |
371 | $this->port = $URI_PARTS["port"]; |
372 | if($this->_isproxy) |
373 | { |
374 | // using proxy, send entire URI |
375 | $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
376 | } |
377 | else |
378 | { |
379 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
380 | // no proxy, send only the path |
381 | $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); |
382 | } |
383 | |
384 | if($this->_redirectaddr) |
385 | { |
386 | /* url was redirected, check if we've hit the max depth */ |
387 | if($this->maxredirs > $this->_redirectdepth) |
388 | { |
389 | if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) |
390 | $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); |
391 | |
392 | // only follow redirect if it's on this site, or offsiteok is true |
393 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
394 | { |
395 | /* follow the redirect */ |
396 | $this->_redirectdepth++; |
397 | $this->lastredirectaddr=$this->_redirectaddr; |
398 | if( strpos( $this->_redirectaddr, "?" ) > 0 ) |
399 | $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get |
400 | else |
401 | $this->submit($this->_redirectaddr,$formvars, $formfiles); |
402 | } |
403 | } |
404 | } |
405 | |
406 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
407 | { |
408 | $frameurls = $this->_frameurls; |
409 | $this->_frameurls = array(); |
410 | |
411 | while(list(,$frameurl) = each($frameurls)) |
412 | { |
413 | if($this->_framedepth < $this->maxframes) |
414 | { |
415 | $this->fetch($frameurl); |
416 | $this->_framedepth++; |
417 | } |
418 | else |
419 | break; |
420 | } |
421 | } |
422 | return true; |
423 | break; |
424 | |
425 | default: |
426 | // not a valid protocol |
427 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
428 | return false; |
429 | break; |
430 | } |
431 | return true; |
432 | } |
433 | |
434 | /*======================================================================*\ |
435 | Function: fetchlinks |
436 | Purpose: fetch the links from a web page |
437 | Input: $URI where you are fetching from |
438 | Output: $this->results an array of the URLs |
439 | \*======================================================================*/ |
440 | |
441 | function fetchlinks($URI) |
442 | { |
443 | if ($this->fetch($URI)) |
444 | { |
445 | if($this->lastredirectaddr) |
446 | $URI = $this->lastredirectaddr; |
447 | if(is_array($this->results)) |
448 | { |
449 | for($x=0;$x<count($this->results);$x++) |
450 | $this->results[$x] = $this->_striplinks($this->results[$x]); |
451 | } |
452 | else |
453 | $this->results = $this->_striplinks($this->results); |
454 | |
455 | if($this->expandlinks) |
456 | $this->results = $this->_expandlinks($this->results, $URI); |
457 | return true; |
458 | } |
459 | else |
460 | return false; |
461 | } |
462 | |
463 | /*======================================================================*\ |
464 | Function: fetchform |
465 | Purpose: fetch the form elements from a web page |
466 | Input: $URI where you are fetching from |
467 | Output: $this->results the resulting html form |
468 | \*======================================================================*/ |
469 | |
470 | function fetchform($URI) |
471 | { |
472 | |
473 | if ($this->fetch($URI)) |
474 | { |
475 | |
476 | if(is_array($this->results)) |
477 | { |
478 | for($x=0;$x<count($this->results);$x++) |
479 | $this->results[$x] = $this->_stripform($this->results[$x]); |
480 | } |
481 | else |
482 | $this->results = $this->_stripform($this->results); |
483 | |
484 | return true; |
485 | } |
486 | else |
487 | return false; |
488 | } |
489 | |
490 | |
491 | /*======================================================================*\ |
492 | Function: fetchtext |
493 | Purpose: fetch the text from a web page, stripping the links |
494 | Input: $URI where you are fetching from |
495 | Output: $this->results the text from the web page |
496 | \*======================================================================*/ |
497 | |
498 | function fetchtext($URI) |
499 | { |
500 | if($this->fetch($URI)) |
501 | { |
502 | if(is_array($this->results)) |
503 | { |
504 | for($x=0;$x<count($this->results);$x++) |
505 | $this->results[$x] = $this->_striptext($this->results[$x]); |
506 | } |
507 | else |
508 | $this->results = $this->_striptext($this->results); |
509 | return true; |
510 | } |
511 | else |
512 | return false; |
513 | } |
514 | |
515 | /*======================================================================*\ |
516 | Function: submitlinks |
517 | Purpose: grab links from a form submission |
518 | Input: $URI where you are submitting from |
519 | Output: $this->results an array of the links from the post |
520 | \*======================================================================*/ |
521 | |
522 | function submitlinks($URI, $formvars="", $formfiles="") |
523 | { |
524 | if($this->submit($URI,$formvars, $formfiles)) |
525 | { |
526 | if($this->lastredirectaddr) |
527 | $URI = $this->lastredirectaddr; |
528 | if(is_array($this->results)) |
529 | { |
530 | for($x=0;$x<count($this->results);$x++) |
531 | { |
532 | $this->results[$x] = $this->_striplinks($this->results[$x]); |
533 | if($this->expandlinks) |
534 | $this->results[$x] = $this->_expandlinks($this->results[$x],$URI); |
535 | } |
536 | } |
537 | else |
538 | { |
539 | $this->results = $this->_striplinks($this->results); |
540 | if($this->expandlinks) |
541 | $this->results = $this->_expandlinks($this->results,$URI); |
542 | } |
543 | return true; |
544 | } |
545 | else |
546 | return false; |
547 | } |
548 | |
549 | /*======================================================================*\ |
550 | Function: submittext |
551 | Purpose: grab text from a form submission |
552 | Input: $URI where you are submitting from |
553 | Output: $this->results the text from the web page |
554 | \*======================================================================*/ |
555 | |
556 | function submittext($URI, $formvars = "", $formfiles = "") |
557 | { |
558 | if($this->submit($URI,$formvars, $formfiles)) |
559 | { |
560 | if($this->lastredirectaddr) |
561 | $URI = $this->lastredirectaddr; |
562 | if(is_array($this->results)) |
563 | { |
564 | for($x=0;$x<count($this->results);$x++) |
565 | { |
566 | $this->results[$x] = $this->_striptext($this->results[$x]); |
567 | if($this->expandlinks) |
568 | $this->results[$x] = $this->_expandlinks($this->results[$x],$URI); |
569 | } |
570 | } |
571 | else |
572 | { |
573 | $this->results = $this->_striptext($this->results); |
574 | if($this->expandlinks) |
575 | $this->results = $this->_expandlinks($this->results,$URI); |
576 | } |
577 | return true; |
578 | } |
579 | else |
580 | return false; |
581 | } |
582 | |
583 | |
584 | |
585 | /*======================================================================*\ |
586 | Function: set_submit_multipart |
587 | Purpose: Set the form submission content type to |
588 | multipart/form-data |
589 | \*======================================================================*/ |
590 | function set_submit_multipart() |
591 | { |
592 | $this->_submit_type = "multipart/form-data"; |
593 | } |
594 | |
595 | |
596 | /*======================================================================*\ |
597 | Function: set_submit_normal |
598 | Purpose: Set the form submission content type to |
599 | application/x-www-form-urlencoded |
600 | \*======================================================================*/ |
601 | function set_submit_normal() |
602 | { |
603 | $this->_submit_type = "application/x-www-form-urlencoded"; |
604 | } |
605 | |
606 | |
607 | |
608 | |
609 | /*======================================================================*\ |
610 | Private functions |
611 | \*======================================================================*/ |
612 | |
613 | |
614 | /*======================================================================*\ |
615 | Function: _striplinks |
616 | Purpose: strip the hyperlinks from an html document |
617 | Input: $document document to strip. |
618 | Output: $match an array of the links |
619 | \*======================================================================*/ |
620 | |
621 | function _striplinks($document) |
622 | { |
623 | preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
624 | ([\"\'])? # find single or double quote |
625 | (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
626 | # quote, otherwise match up to next space |
627 | 'isx",$document,$links); |
628 | |
629 | |
630 | // catenate the non-empty matches from the conditional subpattern |
631 | |
632 | while(list($key,$val) = each($links[2])) |
633 | { |
634 | if(!empty($val)) |
635 | $match[] = $val; |
636 | } |
637 | |
638 | while(list($key,$val) = each($links[3])) |
639 | { |
640 | if(!empty($val)) |
641 | $match[] = $val; |
642 | } |
643 | |
644 | // return the links |
645 | return $match; |
646 | } |
647 | |
648 | /*======================================================================*\ |
649 | Function: _stripform |
650 | Purpose: strip the form elements from an html document |
651 | Input: $document document to strip. |
652 | Output: $match an array of the links |
653 | \*======================================================================*/ |
654 | |
655 | function _stripform($document) |
656 | { |
657 | preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); |
658 | |
659 | // catenate the matches |
660 | $match = implode("\r\n",$elements[0]); |
661 | |
662 | // return the links |
663 | return $match; |
664 | } |
665 | |
666 | |
667 | |
668 | /*======================================================================*\ |
669 | Function: _striptext |
670 | Purpose: strip the text from an html document |
671 | Input: $document document to strip. |
672 | Output: $text the resulting text |
673 | \*======================================================================*/ |
674 | |
675 | function _striptext($document) |
676 | { |
677 | |
678 | // I didn't use preg eval (//e) since that is only available in PHP 4.0. |
679 | // so, list your entities one by one here. I included some of the |
680 | // more common ones. |
681 | |
682 | $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
683 | "'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
684 | "'([\r\n])[\s]+'", // strip out white space |
685 | "'&(quot|#34|#034|#x22);'i", // replace html entities |
686 | "'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
687 | "'&(lt|#60|#060|#x3c);'i", |
688 | "'&(gt|#62|#062|#x3e);'i", |
689 | "'&(nbsp|#160|#xa0);'i", |
690 | "'&(iexcl|#161);'i", |
691 | "'&(cent|#162);'i", |
692 | "'&(pound|#163);'i", |
693 | "'&(copy|#169);'i", |
694 | "'&(reg|#174);'i", |
695 | "'&(deg|#176);'i", |
696 | "'&(#39|#039|#x27);'", |
697 | "'&(euro|#8364);'i", // europe |
698 | "'&a(uml|UML);'", // german |
699 | "'&o(uml|UML);'", |
700 | "'&u(uml|UML);'", |
701 | "'&A(uml|UML);'", |
702 | "'&O(uml|UML);'", |
703 | "'&U(uml|UML);'", |
704 | "'ß'i", |
705 | ); |
706 | $replace = array( "", |
707 | "", |
708 | "\\1", |
709 | "\"", |
710 | "&", |
711 | "<", |
712 | ">", |
713 | " ", |
714 | chr(161), |
715 | chr(162), |
716 | chr(163), |
717 | chr(169), |
718 | chr(174), |
719 | chr(176), |
720 | chr(39), |
721 | chr(128), |
722 | "", |
723 | "", |
724 | "", |
725 | "", |
726 | "", |
727 | "", |
728 | "", |
729 | ); |
730 | |
731 | $text = preg_replace($search,$replace,$document); |
732 | |
733 | return $text; |
734 | } |
735 | |
736 | /*======================================================================*\ |
737 | Function: _expandlinks |
738 | Purpose: expand each link into a fully qualified URL |
739 | Input: $links the links to qualify |
740 | $URI the full URI to get the base from |
741 | Output: $expandedLinks the expanded links |
742 | \*======================================================================*/ |
743 | |
744 | function _expandlinks($links,$URI) |
745 | { |
746 | |
747 | preg_match("/^[^\?]+/",$URI,$match); |
748 | |
749 | $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); |
750 | $match = preg_replace("|/$|","",$match); |
751 | $match_part = parse_url($match); |
752 | $match_root = |
753 | $match_part["scheme"]."://".$match_part["host"]; |
754 | |
755 | $search = array( "|^http://".preg_quote($this->host)."|i", |
756 | "|^(\/)|i", |
757 | "|^(?!http://)(?!mailto:)|i", |
758 | "|/\./|", |
759 | "|/[^\/]+/\.\./|" |
760 | ); |
761 | |
762 | $replace = array( "", |
763 | $match_root."/", |
764 | $match."/", |
765 | "/", |
766 | "/" |
767 | ); |
768 | |
769 | $expandedLinks = preg_replace($search,$replace,$links); |
770 | |
771 | return $expandedLinks; |
772 | } |
773 | |
774 | /*======================================================================*\ |
775 | Function: _httprequest |
776 | Purpose: go get the http data from the server |
777 | Input: $url the url to fetch |
778 | $fp the current open file pointer |
779 | $URI the full URI |
780 | $body body contents to send if any (POST) |
781 | Output: |
782 | \*======================================================================*/ |
783 | |
784 | function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") |
785 | { |
786 | $cookie_headers = ''; |
787 | if($this->passcookies && $this->_redirectaddr) |
788 | $this->setcookies(); |
789 | |
790 | $URI_PARTS = parse_url($URI); |
791 | if(empty($url)) |
792 | $url = "/"; |
793 | $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; |
794 | if(!empty($this->agent)) |
795 | $headers .= "User-Agent: ".$this->agent."\r\n"; |
796 | if(!empty($this->host) && !isset($this->rawheaders['Host'])) { |
797 | $headers .= "Host: ".$this->host; |
798 | if(!empty($this->port) && $this->port != 80) |
799 | $headers .= ":".$this->port; |
800 | $headers .= "\r\n"; |
801 | } |
802 | if(!empty($this->accept)) |
803 | $headers .= "Accept: ".$this->accept."\r\n"; |
804 | if(!empty($this->referer)) |
805 | $headers .= "Referer: ".$this->referer."\r\n"; |
806 | if(!empty($this->cookies)) |
807 | { |
808 | if(!is_array($this->cookies)) |
809 | $this->cookies = (array)$this->cookies; |
810 | |
811 | reset($this->cookies); |
812 | if ( count($this->cookies) > 0 ) { |
813 | $cookie_headers .= 'Cookie: '; |
814 | foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
815 | $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; |
816 | } |
817 | $headers .= substr($cookie_headers,0,-2) . "\r\n"; |
818 | } |
819 | } |
820 | if(!empty($this->rawheaders)) |
821 | { |
822 | if(!is_array($this->rawheaders)) |
823 | $this->rawheaders = (array)$this->rawheaders; |
824 | while(list($headerKey,$headerVal) = each($this->rawheaders)) |
825 | $headers .= $headerKey.": ".$headerVal."\r\n"; |
826 | } |
827 | if(!empty($content_type)) { |
828 | $headers .= "Content-type: $content_type"; |
829 | if ($content_type == "multipart/form-data") |
830 | $headers .= "; boundary=".$this->_mime_boundary; |
831 | $headers .= "\r\n"; |
832 | } |
833 | if(!empty($body)) |
834 | $headers .= "Content-length: ".strlen($body)."\r\n"; |
835 | if(!empty($this->user) || !empty($this->pass)) |
836 | $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n"; |
837 | |
838 | //add proxy auth headers |
839 | if(!empty($this->proxy_user)) |
840 | $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n"; |
841 | |
842 | |
843 | $headers .= "\r\n"; |
844 | |
845 | // set the read timeout if needed |
846 | if ($this->read_timeout > 0) |
847 | socket_set_timeout($fp, $this->read_timeout); |
848 | $this->timed_out = false; |
849 | |
850 | fwrite($fp,$headers.$body,strlen($headers.$body)); | //Arbitrary file manipulations
|
851 | |
852 | $this->_redirectaddr = false; |
853 | unset($this->headers); |
854 | |
855 | while($currentHeader = fgets($fp,$this->_maxlinelen)) |
856 | { |
857 | if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
858 | { |
859 | $this->status=-100; |
860 | return false; |
861 | } |
862 | |
863 | if($currentHeader == "\r\n") |
864 | break; |
865 | |
866 | // if a header begins with Location: or URI:, set the redirect |
867 | if(preg_match("/^(Location:|URI:)/i",$currentHeader)) |
868 | { |
869 | // get URL portion of the redirect |
870 | preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches); |
871 | // look for :// in the Location header to see if hostname is included |
872 | if(!preg_match("|\:\/\/|",$matches[2])) |
873 | { |
874 | // no host in the path, so prepend |
875 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
876 | // eliminate double slash |
877 | if(!preg_match("|^/|",$matches[2])) |
878 | $this->_redirectaddr .= "/".$matches[2]; |
879 | else |
880 | $this->_redirectaddr .= $matches[2]; |
881 | } |
882 | else |
883 | $this->_redirectaddr = $matches[2]; |
884 | } |
885 | |
886 | if(preg_match("|^HTTP/|",$currentHeader)) |
887 | { |
888 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) |
889 | { |
890 | $this->status= $status[1]; |
891 | } |
892 | $this->response_code = $currentHeader; |
893 | } |
894 | |
895 | $this->headers[] = $currentHeader; |
896 | } |
897 | |
898 | $results = ''; |
899 | do { |
900 | $_data = fread($fp, $this->maxlength); | //Arbitrary file disclosing
|
901 | if (strlen($_data) == 0) { |
902 | break; |
903 | } |
904 | $results .= $_data; |
905 | } while(true); |
906 | |
907 | if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
908 | { |
909 | $this->status=-100; |
910 | return false; |
911 | } |
912 | |
913 | // check if there is a a redirect meta tag |
914 | |
915 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
916 | |
917 | { |
918 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
919 | } |
920 | |
921 | // have we hit our frame depth and is there frame src to fetch? |
922 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
923 | { |
924 | $this->results[] = $results; |
925 | for($x=0; $x<count($match[1]); $x++) |
926 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
927 | } |
928 | // have we already fetched framed content? |
929 | elseif(is_array($this->results)) |
930 | $this->results[] = $results; |
931 | // no framed content |
932 | else |
933 | $this->results = $results; |
934 | |
935 | return true; |
936 | } |
937 | |
938 | /*======================================================================*\ |
939 | Function: _httpsrequest |
940 | Purpose: go get the https data from the server using curl |
941 | Input: $url the url to fetch |
942 | $URI the full URI |
943 | $body body contents to send if any (POST) |
944 | Output: |
945 | \*======================================================================*/ |
946 | |
947 | function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") |
948 | { |
949 | if($this->passcookies && $this->_redirectaddr) |
950 | $this->setcookies(); |
951 | |
952 | $headers = array(); |
953 | |
954 | $URI_PARTS = parse_url($URI); |
955 | if(empty($url)) |
956 | $url = "/"; |
957 | // GET ... header not needed for curl |
958 | //$headers[] = $http_method." ".$url." ".$this->_httpversion; |
959 | if(!empty($this->agent)) |
960 | $headers[] = "User-Agent: ".$this->agent; |
961 | if(!empty($this->host)) |
962 | if(!empty($this->port)) |
963 | $headers[] = "Host: ".$this->host.":".$this->port; |
964 | else |
965 | $headers[] = "Host: ".$this->host; |
966 | if(!empty($this->accept)) |
967 | $headers[] = "Accept: ".$this->accept; |
968 | if(!empty($this->referer)) |
969 | $headers[] = "Referer: ".$this->referer; |
970 | if(!empty($this->cookies)) |
971 | { |
972 | if(!is_array($this->cookies)) |
973 | $this->cookies = (array)$this->cookies; |
974 | |
975 | reset($this->cookies); |
976 | if ( count($this->cookies) > 0 ) { |
977 | $cookie_str = 'Cookie: '; |
978 | foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
979 | $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; |
980 | } |
981 | $headers[] = substr($cookie_str,0,-2); |
982 | } |
983 | } |
984 | if(!empty($this->rawheaders)) |
985 | { |
986 | if(!is_array($this->rawheaders)) |
987 | $this->rawheaders = (array)$this->rawheaders; |
988 | while(list($headerKey,$headerVal) = each($this->rawheaders)) |
989 | $headers[] = $headerKey.": ".$headerVal; |
990 | } |
991 | if(!empty($content_type)) { |
992 | if ($content_type == "multipart/form-data") |
993 | $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; |
994 | else |
995 | $headers[] = "Content-type: $content_type"; |
996 | } |
997 | if(!empty($body)) |
998 | $headers[] = "Content-length: ".strlen($body); |
999 | if(!empty($this->user) || !empty($this->pass)) |
1000 | $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); |