Python 简单业务爬虫

昊漫玉 发表于 2018-8-7 09:38:39

#!/usr/bin/python　　
# -*- coding: UTF-8 -*-
　　
import urllib
　　
import re
　　

　　
def getHtml(url):
　　
page = urllib.urlopen(url)
　　
html = page.read()
　　
return html
　　

　　
def getImg(html):
　　
reg = r'src="(.+?\.jpg)" pic_ext'
　　
imgre = re.compile(reg)
　　
imglist = re.findall(imgre,html)
　　
x = 0
　　
for imgurl in imglist:
　　
urllib.urlretrieve(imgurl,'%s.jpg' % x)
　　
x+=1
　　

　　
html = getHtml("http://tieba.baidu.com/p/2460150866")
　　

　　
print getImg(html)

页: [1]

运维网's Archiver

Python 简单业务爬虫