def ShowCity():
html=requests.get("http://www.tianqihoubao.com/weather/province.aspx?id=420000")
citys= re.findall('<td align="center"><a href="(.*?)">', html.text,re.S)
for city in citys:
print city
抓取的结果如下所示:
def ShowWeather(city):
res =str(city).split('" title="')
print res[1],'(白天-->夜间)'
html=requests.get("http://www.tianqihoubao.com/weather/{0}".format(res[0]))
weather=re.search('<table width="100%" border="0" class="b" cellpadding="1" cellspacing="1">(.*?)</table>', html.text,re.S).group(1)
res=re.findall('<tr>(.*?)</tr>', weather,re.S)
for x in res[2:]:
w = re.findall('>(.*?)<', x,re.S)
for y in w[1:]:
if len(y.strip())<=0:
pass
else:
print y
print '--'*40
这样以来,我们就可以获取到了对应城市的天气情况了!!
完整代码:
1 #coding:UTF-8
2 import re
3 import requests
4 import sys
5 reload(sys)
6 sys.setdefaultencoding('UTF-8')
7
8 def ShowWeather(city):
9 res =str(city).split('" title="')
10 print res[1],'(白天-->夜间)'
11 html=requests.get("http://www.tianqihoubao.com/weather/{0}".format(res[0]))
12 weather=re.search('<table width="100%" border="0" class="b" cellpadding="1" cellspacing="1">(.*?)</table>', html.text,re.S).group(1)
13 res=re.findall('<tr>(.*?)</tr>', weather,re.S)
14 for x in res[2:]:
15 w = re.findall('>(.*?)<', x,re.S)
16 for y in w[1:]:
17 if len(y.strip())<=0:
18 pass
19 else:
20 print y
21 print '--'*40
22 print '\n','*'*40
23
24 def ShowCity():
25 html=requests.get("http://www.tianqihoubao.com/weather/province.aspx?id=420000")
26 citys= re.findall('<td align="center"><a href="(.*?)">', html.text,re.S)
27 for city in citys:
28 ShowWeather(city)
29
30 def main():
31 ShowCity()
32
33 if __name__=='__main__':
34 main()
是的,你没有看错,短短34行代码就可以爬取湖北省所有的主要城市1个月的所有天气情况,是不是很厉害呀!!???不过不要高兴的太早,凡事有利有弊,看看它的运行结果吧:[Finished in 371.8s] 3.知识总结: 3.1.编码问题: