|
Snoopy.class.php 是一个关于HTTP协议访问操作的类库,主要是使用在 MagpieRSS 中用于远程文件的抓取,我原来转载的一篇文章大致有简单的介绍这个东西,今天无聊,把代码帖出来,大家参考学习。
<?php
/*************************************************
Snoopy-thePHPnetclient
Author:MonteOhrt<monte@ispi.net>
Copyright(c):1999-2000ispi,allrightsreserved
Version:1.01
*Thislibraryisfreesoftware;youcanredistributeitand/or
*modifyitunderthetermsoftheGNULesserGeneralPublic
*LicenseaspublishedbytheFreeSoftwareFoundation;either
*version2.1oftheLicense,or(atyouroption)anylaterversion.
*
*Thislibraryisdistributedinthehopethatitwillbeuseful,
*butWITHOUTANYWARRANTY;withouteventheimpliedwarrantyof
*MERCHANTABILITYorFITNESSFORAPARTICULARPURPOSE.SeetheGNU
*LesserGeneralPublicLicenseformoredetails.
*
*YoushouldhavereceivedacopyoftheGNULesserGeneralPublic
*Licensealongwiththislibrary;ifnot,writetotheFreeSoftware
*Foundation,Inc.,59TemplePlace,Suite330,Boston,MA02111-1307USA
YoumaycontacttheauthorofSnoopybye-mailat:
monte@ispi.net
Or,writeto:
MonteOhrt
CTO,ispi
237S.70thsuite220
Lincoln,NE68510
ThelatestversionofSnoopycanbeobtainedfrom:
http://snoopy.sourceforge.net/
*************************************************/
classSnoopy
{
/****Publicvariables****/
/*userdefinablevars*/
var$host="www.php.net";//hostnameweareconnectingto
var$port=80;//portweareconnectingto
var$proxy_host="";//proxyhosttouse
var$proxy_port="";//proxyporttouse
var$proxy_user="";//proxyusertouse
var$proxy_pass="";//proxypasswordtouse
var$agent="Snoopyv1.2.3";//agentwemasqueradeas
var$referer="";//refererinfotopass
var$cookies=array();//arrayofcookiestopass
//$cookies["username"]="joe";
var$rawheaders=array();//arrayofrawheaderstosend
//$rawheaders["Content-type"]="text/html";
var$maxredirs=5;//httpredirectiondepthmaximum.0=disallow
var$lastredirectaddr="";//containsaddressoflastredirectedaddress
var$offsiteok=true;//allowsredirectionoff-site
var$maxframes=0;//framecontentdepthmaximum.0=disallow
var$expandlinks=true;//expandlinkstofullyqualifiedURLs.
//thisonlyappliestofetchlinks()
//submitlinks(),andsubmittext()
var$passcookies=true;//passsetcookiesbackthroughredirects
//NOTE:thiscurrentlydoesnotrespect
//dates,domainsorpaths.
var$user="";//userforhttpauthentication
var$pass="";//passwordforhttpauthentication
//httpaccepttypes
var$accept="image/gif,image/x-xbitmap,image/jpeg,image/pjpeg,*/*";
var$results="";//wherethecontentisput
var$error="";//errormessagessenthere
var$response_code="";//responsecodereturnedfromserver
var$headers=array();//headersreturnedfromserversenthere
var$maxlength=500000;//maxreturndatalength(body)
var$read_timeout=0;//timeoutonreadoperations,inseconds
//supportedonlysincePHP4Beta4
//setto0todisallowtimeouts
var$timed_out=false;//ifareadoperationtimedout
var$status=0;//httprequeststatus
var$temp_dir="/tmp";//temporarydirectorythatthewebserver
//haspermissiontowriteto.
//underWindows,thisshouldbeC: emp
var$curl_path="/usr/local/bin/curl";
//SnoopywillusecURLforfetching
//SSLcontentifafullsystempathto
//thecURLbinaryissuppliedhere.
//settofalseifyoudonothave
//cURLinstalled.Seehttp://curl.haxx.se
//fordetailsoninstallingcURL.
//Snoopydoes*not*usethecURL
//libraryfunctionsbuiltintophp,
//asthesefunctionsarenotstable
//asofthisSnoopyrelease.
/****Privatevariables****/
var$_maxlinelen=4096;//maxlinelength(headers)
var$_httpmethod="GET";//defaulthttprequestmethod
var$_httpversion="HTTP/1.0";//defaulthttprequestversion
var$_submit_method="POST";//defaultsubmitmethod
var$_submit_type="application/x-www-form-urlencoded";//defaultsubmittype
var$_mime_boundary="";//MIMEboundaryformultipart/form-datasubmittype
var$_redirectaddr=false;//willbesetifpagefetchedisaredirect
var$_redirectdepth=0;//incrementsonanhttpredirect
var$_frameurls=array();//framesrcurls
var$_framedepth=0;//incrementsonframedepth
var$_isproxy=false;//setifusingaproxyserver
var$_fp_timeout=30;//timeoutforsocketconnection
/*======================================================================*
Function:fetch
Purpose:fetchthecontentsofawebpage
(andpossiblyotherprotocolsinthe
futurelikeftp,nntp,gopher,etc.)
Input:$URIthelocationofthepagetofetch
Output:$this->resultstheoutputtextfromthefetch
*======================================================================*/
functionfetch($URI)
{
//preg_match("|^([^:]+)://([^:/]+)(:[d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS=parse_url($URI);
if(!empty($URI_PARTS["user"]))
$this->user=$URI_PARTS["user"];
if(!empty($URI_PARTS["pass"]))
$this->pass=$URI_PARTS["pass"];
if(empty($URI_PARTS["query"]))
$URI_PARTS["query"]='';
if(empty($URI_PARTS["path"]))
$URI_PARTS["path"]='';
switch(strtolower($URI_PARTS["scheme"]))
{
case"http":
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httprequest($path,$fp,$URI,$this->_httpmethod);
}
$this->_disconnect($fp);
if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}
if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();
while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
}
else
{
returnfalse;
}
returntrue;
break;
case"https":
if(!$this->curl_path)
returnfalse;
if(function_exists("is_executable"))
if(!is_executable($this->curl_path))
returnfalse;
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httpsrequest($URI,$URI,$this->_httpmethod);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httpsrequest($path,$URI,$this->_httpmethod);
}
if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}
if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();
while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
returntrue;
break;
default:
//notavalidprotocol
$this->error='Invalidprotocol"'.$URI_PARTS["scheme"].'" ';
returnfalse;
break;
}
returntrue;
}
/*======================================================================*
Function:submit
Purpose:submitanhttpform
Input:$URIthelocationtopostthedata
$formvarstheformvarstouse.
format:$formvars["var"]="val";
$formfilesanarrayoffilestosubmit
format:$formfiles["var"]="/dir/filename.ext";
Output:$this->resultsthetextoutputfromthepost
*======================================================================*/
functionsubmit($URI,$formvars="",$formfiles="")
{
unset($postdata);
$postdata=$this->_prepare_post_body($formvars,$formfiles);
$URI_PARTS=parse_url($URI);
if(!empty($URI_PARTS["user"]))
$this->user=$URI_PARTS["user"];
if(!empty($URI_PARTS["pass"]))
$this->pass=$URI_PARTS["pass"];
if(empty($URI_PARTS["query"]))
$URI_PARTS["query"]='';
if(empty($URI_PARTS["path"]))
$URI_PARTS["path"]='';
switch(strtolower($URI_PARTS["scheme"]))
{
case"http":
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath<b |
|