|
之前见过别人写的抓取图片的python脚本,自己之前用正则写过,最近看到beautifulsoup 所以拿来练练手
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
| # -*- coding:utf8 -*-
from bs4 import BeautifulSoup
import os, sys, urllib2,time,random
path = os.getcwd()
new_path = os.path.join(path,u'sexy')
if not os.path.isdir(new_path):
os.mkdir(new_path)
def page_loop(page=1):
url = 'http://sexy.faceks.com/tag/美女摄影?page=%s' % page
print url
content = urllib2.urlopen(url)
soup = BeautifulSoup(content)
my_girl = soup.findAll('a',attrs={'class':'img'})#先获取首页每个美女图片的进入链接
for girl in my_girl:
#link = girl.get('src')
girlink = girl.get('href')
print girlink
response = urllib2.urlopen(girlink)
per_soup = BeautifulSoup(response)
img_urls = per_soup.findAll('img',attrs={'class':None})
#print img_urls
for img_url in img_urls: #获取单个美女的所有图片链接
girlurl = img_url.get('src')
print girlurl
content2 = urllib2.urlopen(girlurl).read()
with open(u'sexy'+'/'+time.strftime('%H%M%S')+str(random.randint(1000,9999)),'wb') as code:
code.write(content2)
page_loop()
|
效果图如下:
|
|