|
➜ /test sudo vim test.py
#!/usr/bin/python
#-*- coding:utf-8 -*-
import sys
reload(sys)
# Python2.5 初始化后会删除 sys.setdefaultencoding 这个方法,我们需要重新载入
sys.setdefaultencoding('utf-8')
def weather():
import time
import re
import urllib2
import itchat
#模拟浏览器
hearders = "User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
url = "https://tianqi.moji.com/weather/china/guangdong/shantou" ##要爬去天气预报的网址
par = '(<meta name="description" content=")(.*?)(">)' ##正则匹配,匹配出网页内要的内容
##创建opener对象并设置为全局对象
opener = urllib2.build_opener()
opener.addheaders = [hearders]
urllib2.install_opener(opener)
##获取网页
html = urllib2.urlopen(url).read().decode("utf-8")
##提取需要爬取的内容
data = re.search(par,html).group(2)
print type(data)
data.encode('gb2312')
b = '天气预报'
print type(b)
c = b + '\n' + data
print c
weather() |
|
|