设为首页 收藏本站
查看: 627|回复: 0

[经验分享] PHP采集器 基于yii

[复制链接]

尚未签到

发表于 2015-8-25 11:50:52 | 显示全部楼层 |阅读模式
<?php
class CjRenwuResultController extends Controller
{
/**
* @var string the default layout for the views. Defaults to '//layouts/column2', meaning
* using two-column layout. See 'protected/views/layouts/column2.php'.
*/
public $layout='//layouts/lv_column2';
/**
* @return array action filters
*/
public function filters()
{
return array(
'accessControl', // perform access control for CRUD operations
'postOnly + delete', // we only allow deletion via POST request
);
}
/**
* Specifies the access control rules.
* This method is used by the 'accessControl' filter.
* @return array access control rules
*/
public function accessRules()
{
return array(
array('allow', // allow authenticated user to perform 'create' and 'update' actions
'actions'=>array('create','update','admin','delete'),
'users'=>array('@'),
),
array('deny',  // deny all users
'users'=>array('*'),
),
);
}
/**
* Displays a particular model.
* @param integer $id the ID of the model to be displayed
*/
public function actionView($id)
{
$this->render('view',array(
'model'=>$this->loadModel($id),
));
}
/**
* Creates a new model.
* If creation is successful, the browser will be redirected to the 'view' page.
*/
public function actionCreate()
{
$model=new CjRenwuResult;
// Uncomment the following line if AJAX validation is needed
// $this->performAjaxValidation($model);
if(isset($_POST['CjRenwuResult']))
{
$model->attributes=$_POST['CjRenwuResult'];
if($model->save())
$this->redirect(array('view','id'=>$model->id));
}
$this->render('create',array(
'model'=>$model,
));
}
/**
* Updates a particular model.
* If update is successful, the browser will be redirected to the 'view' page.
* @param integer $id the ID of the model to be updated
*/
public function actionUpdate($id)
{
$model=$this->loadModel($id);
// Uncomment the following line if AJAX validation is needed
// $this->performAjaxValidation($model);
if(isset($_POST['CjRenwuResult']))
{
$model->attributes=$_POST['CjRenwuResult'];
if($model->save())
$this->redirect(array('view','id'=>$model->id));
}
$this->render('update',array(
'model'=>$model,
));
}
/**
* Deletes a particular model.
* If deletion is successful, the browser will be redirected to the 'admin' page.
* @param integer $id the ID of the model to be deleted
*/
public function actionDelete($id)
{
$this->loadModel($id)->delete();
// if AJAX request (triggered by deletion via admin grid view), we should not redirect the browser
if(!isset($_GET['ajax']))
$this->redirect(isset($_POST['returnUrl']) ? $_POST['returnUrl'] : array('admin'));
}
/**
* Lists all models.
*/
public function actionIndex()
{
$dataProvider=new CActiveDataProvider('CjRenwuResult');
$this->render('index',array(
'dataProvider'=>$dataProvider,
));
}
/**
* Manages all models.
*/
public function actionAdmin()
{
$model=new CjRenwuResult('search');
$model->unsetAttributes();  // clear any default values
if(isset($_GET['CjRenwuResult']))
$model->attributes=$_GET['CjRenwuResult'];
$this->render('admin',array(
'model'=>$model,
));
}
/**
* Returns the data model based on the primary key given in the GET variable.
* If the data model is not found, an HTTP exception will be raised.
* @param integer $id the ID of the model to be loaded
* @return CjRenwuResult the loaded model
* @throws CHttpException
*/
public function loadModel($id)
{
$model=CjRenwuResult::model()->findByPk($id);
if($model===null)
throw new CHttpException(404,'The requested page does not exist.');
return $model;
}
/**
* Performs the AJAX validation.
* @param CjRenwuResult $model the model to be validated
*/
protected function performAjaxValidation($model)
{
if(isset($_POST['ajax']) && $_POST['ajax']==='cj-renwu-result-form')
{
echo CActiveForm::validate($model);
Yii::app()->end();
}
}
}

  
  控制器分4个文件
  CjRenwuController.php



<?php
class CjRenwuController extends Controller
{
public $layout='//layouts/lv_column2';
public function filters()
{
return array(
'accessControl', // perform access control for CRUD operations
'postOnly + delete', // we only allow deletion via POST request
        );
}
public function accessRules()
{
return array(
array('allow',  // allow all users to perform 'index' and 'view' actions
'actions'=>array('index','view'),
'users'=>array('*'),
),
array('allow', // allow authenticated user to perform 'create' and 'update' actions
'actions'=>array('create','update','admin','getyuanma'),
'users'=>array('@'),
),
array('allow', // allow admin user to perform 'admin' and 'delete' actions
'actions'=>array('delete'),
'users'=>array('admin'),
),
array('deny',  // deny all users
'users'=>array('*'),
),
);
}
public function actionGetyuanma()
{         
$this->layout='//layouts/lv_getyuanma';
/**获取源码测试**/      
error_reporting(0);
set_time_limit(0);
if(isset($_POST))
{
$url=$_POST["url"];
$bianma=$_POST["bianma"];
if($bianma=='GB2312')
$neirong=iconv("GB2312","UTF-8//IGNORE",file_get_contents($url));
else
$neirong=file_get_contents($url);
}else{
$neirong='';
}
$this->render('getyuanma',array(
'neirong'=>$neirong,
));
}

public function actionView($id)
{
$this->render('view',array(
'model'=>$this->loadModel($id),
));
}

public function actionCreate()
{
$model=new CjRenwu;
if(isset($_POST['CjRenwu']))
{
$model->attributes=$_POST['CjRenwu'];
if($model->save())
$this->redirect(array('admin'));
}
$this->render('create',array(
'model'=>$model,
));
}

public function actionUpdate($id)
{
$model=$this->loadModel($id);
if(isset($_POST["ceshi"]) && $_POST["ceshi"]==1)
{
$url=$_POST['CjRenwu']['url'];
$url2=$_POST['CjRenwu']['url2'];
$url2_start= $_POST['CjRenwu']['url2_start'];
$url2_end= $_POST['CjRenwu']['url2_end'];
$start=$_POST['CjRenwu']['start'];          //超链接区域 开始标示
$end=$_POST['CjRenwu']['end'];            //超链接区域 结束标示
$tiquguize=$_POST['CjRenwu']['tiquguize'];   //内容超链接匹配规则
$dijige=$_POST['CjRenwu']['dijige'];        //第几个(.*)是链接url
$urlqianzhui=$_POST['CjRenwu']['urlqianzhui']; //URL前缀
$bianma=$_POST['CjRenwu']['bianma']; //确定编码

$arr_url=explode("\n",$url);            
$arr_url2=array();
for($k=$url2_start;$k<=$url2_end;$k++)
{
array_push($arr_url2,str_replace('{xxq}',$k,$url2));
}
if($url=='')
{
$arr_url3=$arr_url2;
}else if($url2==''){
$arr_url3=$arr_url;
}else{
$arr_url3=array_merge($arr_url,$arr_url2);
}
echo '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
foreach ($arr_url3 as $key => $value) {
//循环列表页 开始            
$url_now=$value;
echo "<br>";
echo "<font color=red>正在分析列表页:</font>".$url_now;
echo "<br>";
/*********************** 开始 采集URL ****************************/
if($bianma=='gb2312')
$contents=iconv("gb2312","utf-8//IGNORE",file_get_contents($url_now));
else
$contents=file_get_contents($url_now);
$geshi='/'.$start.'(.*?)'.$end.'/is';  //超链接区域 匹配格式  
preg_match_all($geshi,$contents,$pipei); //开始匹配 超链接区域
$zxcontent=$pipei[0][0]; //获得 超链接区域内容(唯一)
$geshi2='/'.$tiquguize.'/is';  //内容超链接url 匹配格式      
preg_match_all($geshi2,$zxcontent,$pipei2); //开始匹配 内容超链接url
$num=count($pipei2[0]);// 获取内容超链接url的数量
$zxlink=array();//将URL结果集 保存到数组中
for($j=0;$j<$num;$j++)
{
array_push($zxlink,$pipei2[$dijige][$j]);
}                 
/*********************** 结束 采集URL ****************************/
/*********************** 开始 显示URL ****************************/
for($p=0;$p<count($zxlink);$p++)
{               
$thisurl=$urlqianzhui.$zxlink[$p];
echo "<br>";
echo "得到的内容页地址是:".$thisurl;  
}
//循环列表页 结束
            }

}else{
if(isset($_POST['CjRenwu']))
{
$model->attributes=$_POST['CjRenwu'];
if($model->save())
$this->redirect(array('update','id'=>$model->id));
}
$this->render('update',array(
'model'=>$model,
));
}

}

public function actionDelete($id)
{
$this->loadModel($id)->delete();
// if AJAX request (triggered by deletion via admin grid view), we should not redirect the browser
if(!isset($_GET['ajax']))
$this->redirect(isset($_POST['returnUrl']) ? $_POST['returnUrl'] : array('admin'));
}
/**
* Lists all models.
*/
public function actionIndex()
{
$dataProvider=new CActiveDataProvider('CjRenwu');
$this->render('index',array(
'dataProvider'=>$dataProvider,
));
}
/**
* Manages all models.
*/
public function actionAdmin()
{
$model=new CjRenwu('search');
$model->unsetAttributes();  // clear any default values
if(isset($_GET['CjRenwu']))
$model->attributes=$_GET['CjRenwu'];
$this->render('admin',array(
'model'=>$model,
));
}
/**
* Returns the data model based on the primary key given in the GET variable.
* If the data model is not found, an HTTP exception will be raised.
* @param integer $id the ID of the model to be loaded
* @return CjRenwu the loaded model
* @throws CHttpException
*/
public function loadModel($id)
{
$model=CjRenwu::model()->findByPk($id);
if($model===null)
throw new CHttpException(404,'The requested page does not exist.');
return $model;
}
/**
* Performs the AJAX validation.
* @param CjRenwu $model the model to be validated
*/
protected function performAjaxValidation($model)
{
if(isset($_POST['ajax']) && $_POST['ajax']==='cj-renwu-form')
{
echo CActiveForm::validate($model);
Yii::app()->end();
}
}
}
  CjRenwuResultController.php
  核心文件CjContentController.php



<?php
class CjCenterController extends Controller
{
public $layout='//layouts/lv_column2';   
public function filters()
{
return array(
'accessControl', // perform access control for CRUD operations
'postOnly + delete', // we only allow deletion via POST request
        );
}
public function accessRules()
{
return array(
array('allow',
'actions'=>array('index','geturl','tiquruku','deleteurl','deleteku','ceshi'),
'users'=>array('@'),
),     
array('deny',  // deny all users
'users'=>array('*'),
),
);
}
public function actionIndex()
{
$CjRenwu_model=new CjRenwu;
$criteria=new CDbCriteria();
$criteria->order='id desc';
$data_renwu=$CjRenwu_model->findAll($criteria);
$this->render('index',array(
'data_renwu'=>$data_renwu,
));
}



public function actionGeturl($renwu_id)
{
$model=$this->loadModel_renwu($renwu_id);
$url=$model->url;
$url2=$model->url2;
$url2_start=$model->url2_start;
$url2_end=$model->url2_end;
$start=$model->start;          //超链接区域 开始标示
$end=$model->end;            //超链接区域 结束标示
$tiquguize=$model->tiquguize;   //内容超链接匹配规则
$dijige=$model->dijige;        //第几个(.*)是链接url
$urlqianzhui=$model->urlqianzhui; //URL前缀
$bianma=$model->bianma; //确定编码
$arr_url=explode("\n",$url);            
$arr_url2=array();
for($k=$url2_start;$k<=$url2_end;$k++)
{
array_push($arr_url2,str_replace('{xxq}',$k,$url2));
}
if($url=='')
{
$arr_url3=$arr_url2;
}else if($url2==''){
$arr_url3=$arr_url;
}else{
$arr_url3=array_merge($arr_url,$arr_url2);
}

foreach ($arr_url3 as $key => $value) {
//循环列表页 开始            
if($value=='')
{
die('采集地址为空!');
}
$url_now=$value;

//echo "<font color=red>正在分析列表页:</font>".$url_now;
//echo "<br>";
/*********************** 开始 采集URL ****************************/
if($bianma=='gb2312')
$contents=iconv("gb2312","utf-8//IGNORE",file_get_contents($url_now));
else
$contents=file_get_contents($url_now);
$geshi='/'.$start.'(.*?)'.$end.'/is';  //超链接区域 匹配格式  
preg_match_all($geshi,$contents,$pipei); //开始匹配 超链接区域
$zxcontent=$pipei[0][0]; //获得 超链接区域内容(唯一)
$geshi2='/'.$tiquguize.'/is';  //内容超链接url 匹配格式      
preg_match_all($geshi2,$zxcontent,$pipei2); //开始匹配 内容超链接url
$num=count($pipei2[0]);// 获取内容超链接url的数量
$zxlink=array();//将URL结果集 保存到数组中
for($j=0;$j<$num;$j++)
{
array_push($zxlink,$pipei2[$dijige][$j]);
}                 
/*********************** 结束 采集URL ****************************/
/*********************** 开始 显示URL ****************************/
for($p=0;$p<count($zxlink);$p++)
{               
$thisurl=$urlqianzhui.$zxlink[$p];
//echo "<br>";
//echo "得到的内容页地址是:".$thisurl;  
$model2=new CjRenwuUrl;
$model2->url=$thisurl;
$model2->renwu_id=$renwu_id;
$model2->bianma=$bianma;
$model2->inputtime=time();
$model2->inputtime2=date('H:i Y-m-d',time());
$model2->save();
}
if($key==(count($arr_url3)-1))
$this->redirect(array('CjCenter/index'));
//循环列表页 结束
            }
}
public function actionDeleteurl($renwu_id)
{
$CjRenwuUrl_model=new CjRenwuUrl;
$criteria=new CDbCriteria();
$criteria->condition="t.renwu_id='".$renwu_id."'";
$criteria->order='id desc';
$data_CjRenwuUrl=$CjRenwuUrl_model->findAll($criteria);
if(count($data_CjRenwuUrl)==0)
$this->redirect(array('CjCenter/index'));
foreach ($data_CjRenwuUrl as $key => $value) {
$this->loadModel_renwuurl($value->id)->delete();
if($key==(count($data_CjRenwuUrl)-1))
$this->redirect(array('CjCenter/index'));
}
}
public function actionTiquruku($renwu_id)
{
$model=$this->loadModel_renwu($renwu_id);
$bianma=$model->bianma;
/*查询标签 开始*/
$CjRenwuBiaoqian_model=new CjRenwuBiaoqian;
$criteria=new CDbCriteria();
$criteria->condition="t.renwu_id='".$renwu_id."'";
$criteria->order='id desc';
$data_CjRenwuBiaoqian=$CjRenwuBiaoqian_model->findAll($criteria);
/*查询标签 结束*/
/*查询URL 开始*/
$CjRenwuUrl_model=new CjRenwuUrl;
$criteria=new CDbCriteria();
$criteria->condition="t.renwu_id='".$renwu_id."' and t.caileme=0";
$criteria->order='id desc';
$data_CjRenwuUrl=$CjRenwuUrl_model->findAll($criteria);
/*查询URL 结束*/
foreach ($data_CjRenwuUrl as $kk => $data) {
//循环每一个文章URL 开始
$url_now=$data->url; //正在处理的URL
$arr_result_tiqu=array(); //储备单个URL提取出来的数据            
foreach ($data_CjRenwuBiaoqian as $key => $value) {
$bq_name=$value->bq_name;
$bq_start=$value->bq_start;
$bq_end=$value->bq_end;
$bq_guize=$value->bq_guize;
$bq_dijige=$value->bq_dijige;
$bq_tihuan=$value->bq_tihuan;
if($bianma=='gb2312')
$contents=iconv("gb2312","utf-8//IGNORE",file_get_contents($url_now));
else
$contents=file_get_contents($url_now);
$geshi='/'.$bq_start.'(.*?)'.$bq_end.'/is';
preg_match_all($geshi,$contents,$pipei);
$zxcontent=$pipei[0][0];
$geshi2='/'.$bq_guize.'/is';  
preg_match_all($geshi2,$zxcontent,$pipei2);
$str_r=$pipei2[$bq_dijige][0];
if($bq_tihuan!=''){
$arr_bq_tihuan=explode("\n",$bq_tihuan);                     
foreach ($arr_bq_tihuan as $key => $value) {
$arr_th=explode('|', $value);
$str_r=str_replace($arr_th[0], $arr_th[1], $str_r);
}   
}
array_push($arr_result_tiqu,$bq_name.':::'.trim($str_r));
}
$str_result_tiqu=implode('^^^', $arr_result_tiqu);
/*结果入库 开始*/
$model2=new CjRenwuResult;
$model2->content=$str_result_tiqu;
$model2->renwu_id=$renwu_id;        
$model2->url=$url_now;         
$model2->inputtime=time();
$model2->inputtime2=date('H:i Y-m-d',time());
$model2->save();
/*结果入库 结束*/
//修改URL为已采集
$model3=$this->loadModel_renwuurl($data->id);
$model3->caileme=1;
$model3->save();
if($kk==(count($data_CjRenwuUrl)-1))
$this->redirect(array('CjCenter/index'));

//循环每一个文章URL 结束
        }

}
public function actionDeleteku($renwu_id)
{
$CjRenwuResult_model=new CjRenwuResult;
$criteria=new CDbCriteria();
$criteria->condition="t.renwu_id='".$renwu_id."'";
$criteria->order='id desc';
$data_CjRenwuResult=$CjRenwuResult_model->findAll($criteria);
if(count($data_CjRenwuResult)==0)
$this->redirect(array('CjCenter/index'));
foreach ($data_CjRenwuResult as $key => $value) {
$this->loadModel_renwuresult($value->id)->delete();
if($key==(count($data_CjRenwuResult)-1))
$this->redirect(array('CjCenter/index'));
}

}

public function loadModel_renwu($id)
{
$model=CjRenwu::model()->findByPk($id);
if($model===null)
throw new CHttpException(404,'The requested page does not exist.');
return $model;
}

public function loadModel_renwuurl($id)
{
$model=CjRenwuUrl::model()->findByPk($id);
if($model===null)
throw new CHttpException(404,'The requested page does not exist.');
return $model;
}
public function loadModel_renwuresult($id)
{
$model=CjRenwuResult::model()->findByPk($id);
if($model===null)
throw new CHttpException(404,'The requested page does not exist.');
return $model;
}
public function actionCeshi(){
$this->render('ceshi');
}
public function actionCeshi2(){
$bq_content='山东全省到青海全省';
$arr_city=explode('到',$bq_content);
$city1=$arr_city[0];
$city2=$arr_city[1];
/*出发地区*/
if(strstr($city1,'全省')){
$city1=str_replace('全省', '', $city1);
$XxqArea_model=new XxqArea;
$criteria=new CDbCriteria;
$criteria->condition="t.areaname like '%$city1%' and t.daima like '%00%'";
$result=$XxqArea_model->find($criteria);
$areaname_sheng_from=$result->areaname; //到达省 名
$areaid_sheng_from=$result->areaid; //到达省 id
$areaname_shi_from=''; //到达市 名
$areaid_shi_from=''; //到达市 id

}else{
$XxqArea_model=new XxqArea;
$criteria=new CDbCriteria;
$criteria->condition="t.areaname like '%$city1%' and t.daima like '%00%'";
$result=$XxqArea_model->find($criteria);
$areaname_shi_from=$result->areaname; //出发市 名
$areaid_shi_from=$result->areaid; //出发市 id
$daima=$result->daima;
$sheng_daima=substr($daima, 0,2).'0000';
$XxqArea_model=new XxqArea;
$criteria=new CDbCriteria;
$criteria->condition="t.daima='$sheng_daima'";
$result=$XxqArea_model->find($criteria);
$areaname_sheng_from=$result->areaname; //出发省 名
$areaid_sheng_from=$result->areaid; //出发省 id

}
/*目的地区*/
if(strstr($city2,'全省')){
$city2=str_replace('全省', '', $city2);
$XxqArea_model=new XxqArea;
$criteria=new CDbCriteria;
$criteria->condition="t.areaname like '%$city2%' and t.daima like '%00%'";
$result=$XxqArea_model->find($criteria);
$areaname_sheng_to=$result->areaname; //到达省 名
$areaid_sheng_to=$result->areaid; //到达省 id
$areaname_shi_to=''; //到达市 名
$areaid_shi_to=''; //到达市 id

}else{
$XxqArea_model=new XxqArea;
$criteria=new CDbCriteria;
$criteria->condition="t.areaname like '%$city2%' and t.daima like '%00%'";
$result=$XxqArea_model->find($criteria);
$areaname_shi_to=$result->areaname; //到达市 名
$areaid_shi_to=$result->areaid; //到达市 id
$daima=$result->daima;
$sheng_daima=substr($daima, 0,2).'0000';
$XxqArea_model=new XxqArea;
$criteria=new CDbCriteria;
$criteria->condition="t.daima='$sheng_daima'";
$result=$XxqArea_model->find($criteria);
$areaname_sheng_to=$result->areaname; //到达省 名
$areaid_sheng_to=$result->areaid; //到达省 id
        }

echo $areaname_sheng_from.'_'.$areaname_shi_from.'>>'.$areaname_sheng_to.'_'.$areaname_shi_to;
echo '<br>';
echo $areaid_sheng_from.'_'.$areaid_shi_from.'>>'.$areaid_sheng_to.'_'.$areaid_shi_to;
}

}
  
  www.zgline.com

运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-103996-1-1.html 上篇帖子: [转]PHP函数的实现原理及性能分析 下篇帖子: 变量
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表