def geturl():
html = urllib.urlopen("http://www.budejie.com/video/").read()
reg = r'data-mp4="(.*?)"'
return re.findall(reg,html)
for page in range(1,100):
for i in geturl():
print i #i是视频的链接地址
video = urllib.urlopen(i).read()
fwc = open('./video/%s' %i.split('/')[-1],'wb')
fwc.write(video)
fwc.close()
爬图片的代码
# -*- coding:utf-8 -*-
import urllib,re
def geturl():
html = urllib.urlopen("http://www.budejie.com/pic/").read()
reg = r'data-original="(.*?)"'
return re.findall(reg,html)
for page in range(1,100):
for i in geturl():
print i #i是图片的链接地址
video = urllib.urlopen(i).read()
fwc = open('./picture/%s' %i.split('/')[-1],'wb')
fwc.write(video)