php下curl模拟百度蜘蛛进行采集

xy123321 · 发表于 2018-12-18 10:33:33

/**　　
   * cURL获取网页内容
　　
   * @author Yusure  哥哥要变百度蜘蛛了
　　
   * @param  [type] [param]
　　
   * @return [type] [description]
　　
   */
　　
private function _GetContent( $url )
　　
{
　　
      $this->ch = curl_init();
　　
      $this->ip = '220.181.108.91';  // 百度蜘蛛
　　
      $this->timeout = 15;
　　
      curl_setopt($this->ch,CURLOPT_URL,$url);
　　
      curl_setopt($this->ch,CURLOPT_TIMEOUT,0);
　　
      //伪造百度蜘蛛IP
　　
      curl_setopt($this->ch,CURLOPT_HTTPHEADER,array('X-FORWARDED-FOR:'.$this->ip.'','CLIENT-IP:'.$this->ip.''));
　　
      //伪造百度蜘蛛头部
　　
      curl_setopt($this->ch,CURLOPT_USERAGENT,"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
　　
      curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
　　
      curl_setopt($this->ch,CURLOPT_HEADER,0);
　　
      curl_setopt($this->ch,CURLOPT_CONNECTTIMEOUT,$this->timeout);
　　
      $content = curl_exec($this->ch);
　　
      if($content === false)
　　
      {//输出错误信息
　　
         $no = curl_errno($this->ch);
　　
         switch(trim($no))
　　
         {
　　
            case 28 : $this->error = '访问目标地址超时'; break;
　　
            default : $this->error = curl_error($this->ch); break;
　　
         }
　　
         echo $this->error;
　　
      }
　　
      else
　　
      {
　　
         $this->succ = true;
　　
         return $content;
　　
      }
　　
}

账号		自动登录	找回密码
密码			立即注册

wirelessnetview好用的无线分析工具

Red Hat RHCE 8 (EX294) Cert Guide

Shell从入门到精通（阿良）

亿图图示专家(EDraw Max) V7.9 中文破解版

zabbix3.4.1安装部署+微信推送信息+大屏显

Red Hat OpenShift I: Containers & Kubern

2025 年，C++ 还能“硬核”多久？

[经验分享] php下curl模拟百度蜘蛛进行采集

浏览过的版块

扫码加入运维网微信交流群