php爬虫 phpspider
<?php/**
* Created by PhpStorm.
* User: brady
* Date: 2016/12/9
* Time: 17:32
*/
ini_set("memory_limit", "1024M");
require dirname(__FILE__).'/../core/init.php';
$url = "http://www.epooll.com/archives/806/";
$html = requests::get($url);
// 抽取文章标题
$selector = "//*[@id=\"content\"]/div/div/h1/a";
$title = selector::select($html, $selector);
// 检查是否抽取到标题
// 抽取文章作者
$selector = "//*[@id=\"content\"]/div/div/h6/span";
$author = selector::select($html, $selector);
// 检查是否抽取到作者
// 去掉 作者:
$author = str_replace("作者:", "", $author);
//发布时间
$selector = "//*[@id=\"content\"]/div/div/h6/span";
$time = selector::select($html, $selector);
$time = str_replace("发布时间:",'', $time);
$time= date("Y-m-d H:i:s",strtotime($time));
// 抽取文章内容
$selector = "//*[@id=\"content\"]/div/div";
$content = selector::select($html, $selector);
// 检查是否抽取到内容
$data = array(
'article_title' => $title,
'article_author' => $author,
'article_content' => $content,
);
// 查看数据是否正常
$res = db::insert("content", $data);
var_dump($res);
页:
[1]