设为首页 收藏本站
查看: 528|回复: 0

[经验分享] [转] 贴Snoopy.class.php代码学习参考

[复制链接]

尚未签到

发表于 2017-4-10 10:04:46 | 显示全部楼层 |阅读模式
  
Snoopy.class.php 是一个关于HTTP协议访问操作的类库,主要是使用在 MagpieRSS 中用于远程文件的抓取,我原来转载的一篇文章大致有简单的介绍这个东西,今天无聊,把代码帖出来,大家参考学习。
<?php

/*************************************************

Snoopy-thePHPnetclient
Author:MonteOhrt<monte@ispi.net>
Copyright(c):1999-2000ispi,allrightsreserved
Version:1.01

*Thislibraryisfreesoftware;youcanredistributeitand/or
*modifyitunderthetermsoftheGNULesserGeneralPublic
*LicenseaspublishedbytheFreeSoftwareFoundation;either
*version2.1oftheLicense,or(atyouroption)anylaterversion.
*
*Thislibraryisdistributedinthehopethatitwillbeuseful,
*butWITHOUTANYWARRANTY;withouteventheimpliedwarrantyof
*MERCHANTABILITYorFITNESSFORAPARTICULARPURPOSE.SeetheGNU
*LesserGeneralPublicLicenseformoredetails.
*
*YoushouldhavereceivedacopyoftheGNULesserGeneralPublic
*Licensealongwiththislibrary;ifnot,writetotheFreeSoftware
*Foundation,Inc.,59TemplePlace,Suite330,Boston,MA02111-1307USA

YoumaycontacttheauthorofSnoopybye-mailat:
monte@ispi.net

Or,writeto:
MonteOhrt
CTO,ispi
237S.70thsuite220
Lincoln,NE68510

ThelatestversionofSnoopycanbeobtainedfrom:
http://snoopy.sourceforge.net/

************************************************
*/

classSnoopy
{
/****Publicvariables****/

/*userdefinablevars*/

var$host="www.php.net";//hostnameweareconnectingto
var$port=80;//portweareconnectingto
var$proxy_host="";//proxyhosttouse
var$proxy_port="";//proxyporttouse
var$proxy_user="";//proxyusertouse
var$proxy_pass="";//proxypasswordtouse

var$agent="Snoopyv1.2.3";//agentwemasqueradeas
var$referer="";//refererinfotopass
var$cookies=array();//arrayofcookiestopass
//$cookies["username"]="joe";

var$rawheaders=array();//arrayofrawheaderstosend
//$rawheaders["Content-type"]="text/html";


var$maxredirs=5;//httpredirectiondepthmaximum.0=disallow
var$lastredirectaddr="";//containsaddressoflastredirectedaddress
var$offsiteok=true;//allowsredirectionoff-site
var$maxframes=0;//framecontentdepthmaximum.0=disallow
var$expandlinks=true;//expandlinkstofullyqualifiedURLs.
//thisonlyappliestofetchlinks()
//submitlinks(),andsubmittext()

var$passcookies=true;//passsetcookiesbackthroughredirects
//NOTE:thiscurrentlydoesnotrespect
//dates,domainsorpaths.


var$user="";//userforhttpauthentication
var$pass="";//passwordforhttpauthentication

//httpaccepttypes

var$accept="image/gif,image/x-xbitmap,image/jpeg,image/pjpeg,*/*";

var$results="";//wherethecontentisput

var$error="";//errormessagessenthere
var$response_code="";//responsecodereturnedfromserver
var$headers=array();//headersreturnedfromserversenthere
var$maxlength=500000;//maxreturndatalength(body)
var$read_timeout=0;//timeoutonreadoperations,inseconds
//supportedonlysincePHP4Beta4
//setto0todisallowtimeouts

var$timed_out=false;//ifareadoperationtimedout
var$status=0;//httprequeststatus

var$temp_dir="/tmp";//temporarydirectorythatthewebserver
//haspermissiontowriteto.
//underWindows,thisshouldbeC: emp


var$curl_path="/usr/local/bin/curl";
//SnoopywillusecURLforfetching
//SSLcontentifafullsystempathto
//thecURLbinaryissuppliedhere.
//settofalseifyoudonothave
//cURLinstalled.Seehttp://curl.haxx.se
//fordetailsoninstallingcURL.
//Snoopydoes*not*usethecURL
//libraryfunctionsbuiltintophp,
//asthesefunctionsarenotstable
//asofthisSnoopyrelease.


/****Privatevariables****/

var$_maxlinelen=4096;//maxlinelength(headers)

var$_httpmethod="GET";//defaulthttprequestmethod
var$_httpversion="HTTP/1.0";//defaulthttprequestversion
var$_submit_method="POST";//defaultsubmitmethod
var$_submit_type="application/x-www-form-urlencoded";//defaultsubmittype
var$_mime_boundary="";//MIMEboundaryformultipart/form-datasubmittype
var$_redirectaddr=false;//willbesetifpagefetchedisaredirect
var$_redirectdepth=0;//incrementsonanhttpredirect
var$_frameurls=array();//framesrcurls
var$_framedepth=0;//incrementsonframedepth

var$_isproxy=false;//setifusingaproxyserver
var$_fp_timeout=30;//timeoutforsocketconnection

/*======================================================================*
Function:fetch
Purpose:fetchthecontentsofawebpage
(andpossiblyotherprotocolsinthe
futurelikeftp,nntp,gopher,etc.)
Input:$URIthelocationofthepagetofetch
Output:$this->resultstheoutputtextfromthefetch
*======================================================================
*/

functionfetch($URI)
{

//preg_match("|^([^:]+)://([^:/]+)(:[d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS=parse_url($URI);
if(!empty($URI_PARTS["user"]))
$this->user=$URI_PARTS["user"];
if(!empty($URI_PARTS["pass"]))
$this->pass=$URI_PARTS["pass"];
if(empty($URI_PARTS["query"]))
$URI_PARTS["query"]='';
if(empty($URI_PARTS["path"]))
$URI_PARTS["path"]='';

switch(strtolower($URI_PARTS["scheme"]))
{
case"http":
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httprequest($path,$fp,$URI,$this->_httpmethod);
}

$this->_disconnect($fp);

if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}

if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();

while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
}
else
{
returnfalse;
}
returntrue;
break;
case"https":
if(!$this->curl_path)
returnfalse;
if(function_exists("is_executable"))
if(!is_executable($this->curl_path))
returnfalse;
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httpsrequest($URI,$URI,$this->_httpmethod);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httpsrequest($path,$URI,$this->_httpmethod);
}

if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}

if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();

while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
returntrue;
break;
default:
//notavalidprotocol
$this->error='Invalidprotocol"'.$URI_PARTS["scheme"].'" ';
returnfalse;
break;
}
returntrue;
}

/*======================================================================*
Function:submit
Purpose:submitanhttpform
Input:$URIthelocationtopostthedata
$formvarstheformvarstouse.
format:$formvars["var"]="val";
$formfilesanarrayoffilestosubmit
format:$formfiles["var"]="/dir/filename.ext";
Output:$this->resultsthetextoutputfromthepost
*======================================================================
*/

functionsubmit($URI,$formvars="",$formfiles="")
{
unset($postdata);

$postdata=$this->_prepare_post_body($formvars,$formfiles);

$URI_PARTS=parse_url($URI);
if(!empty($URI_PARTS["user"]))
$this->user=$URI_PARTS["user"];
if(!empty($URI_PARTS["pass"]))
$this->pass=$URI_PARTS["pass"];
if(empty($URI_PARTS["query"]))
$URI_PARTS["query"]='';
if(empty($URI_PARTS["path"]))
$URI_PARTS["path"]='';

switch(strtolower($URI_PARTS["scheme"]))
{
case"http":
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath<b

运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-362761-1-1.html 上篇帖子: [分享]使用 PHP 監測伺服器狀態 下篇帖子: 使用PHP对非法内容进行检查
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表