昊漫玉 发表于 2018-8-7 09:38:39

Python 简单业务爬虫

#!/usr/bin/python  
# -*- coding: UTF-8 -*-
  
import urllib
  
import re
  

  
def getHtml(url):
  
    page = urllib.urlopen(url)
  
    html = page.read()
  
    return html
  

  
def getImg(html):
  
    reg = r'src="(.+?\.jpg)" pic_ext'
  
    imgre = re.compile(reg)
  
    imglist = re.findall(imgre,html)
  
    x = 0
  
    for imgurl in imglist:
  
      urllib.urlretrieve(imgurl,'%s.jpg' % x)
  
      x+=1
  

  
html = getHtml("http://tieba.baidu.com/p/2460150866")
  

  
print getImg(html)
页: [1]
查看完整版本: Python 简单业务爬虫