python多线程下载图片

fox111 发表于 2018-8-11 13:30:39

功能：从image.baidu.com自动翻页下载图片的python程序　　用法：运行程序后，输入关键字即可
　　#!/usr/bin/python
　　# filename: getbaidupic.py
　　# description: get images from image.baidu.com
　　# author: cjcse
　　# version: v 0.21
　　import urllib
　　import htmllib
　　import formatter
　　import string
　　import os
　　import sys
　　import time
　　import thread
　　#import threading
　　class Parser(htmllib.HTMLParser):
　　#return a dictionary mapping anchor texts to lists of associated hyperlinks
　　def __init__(self, verbose=0):
　　self.anchors = {}
　　f = formatter.NullFormatter()
　　htmllib.HTMLParser.__init__(self, f, verbose)
　　def anchor_bgn(self, href, name, type):
　　self.save_bgn()
　　self.anchor = href
　　def anchor_end(self):
　　text = string.strip(self.save_end())
　　if self.anchor and text:
　　self.anchors = self.anchors.get(text, []) +
　　def GetJpg(url):
　　try:
　　global save
　　global total
　　global successed
　　global failed
　　total += 1
　　seps = url.split("/")
　　size = len(seps)
　　name = seps
　　name = save + "\\" + name
　　i = 1
　　list = name.split(".")
　　while os.path.exists(name):
　　if len(list) == 2:
　　name = list + "_" + repr(i) + "." + list
　　else:
　　name = list + "_" + repr(i)
　　i += 1
　　dat = urllib.urlopen(url).read()
　　if len(dat) < 11024:
　　print url + "\t"
　　return
　　op = open(name, "wb")
　　if not op:
　　print url + "\t"
　　exit()
　　op.write(dat)
　　op.close()
　　print url + "\t"
　　except:
　　print url + "\t"
　　def GetBaiduNextPage(url):
　　global pn
　　url += "&rn=" + repr(rn) + "&pn=" + repr(pn) + "&ln=" + repr(ln)
　　pn += 18
　　return url
　　def GetAllJpg(url):
　　html = urllib.urlopen(url).read()
　　p = Parser()
　　p.feed(html)
　　p.close()
　　cnt = 0
　　for k, v in p.anchors.items():
　　for uri in v:
　　if uri.find(".jpg") != -1:
　　ls = uri.split("&")
　　for st in ls:
　　url2 = st.split("=")
　　for st2 in url2:
　　st2 = string.lower(st2)
　　if string.find(st2, "http://") != -1 and string.find(st2, ".jpg") != -1:
　　try:
　　GetJpg(st2)
　　except:
　　continue
　　print "---------------------------------------------------------------------"
　　print "Description: Get images from image.baidu.com. "
　　print "Author: cjcse from CU."
　　print "version: v 0.2."
　　print "---------------------------------------------------------------------"
　　str = raw_input("Input your keywords: ")
　　while (len(str) == 0):
　　str = raw_input("Keyword: ")
　　url = "http://image.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baiduimage&pv=&word=" + str + "&z=5"
　　try:
　　if not os.path.exists("c:\\image_baidu"):
　　os.mkdir("c:\\image_baidu")
　　except:
　　print "Failed to create directory in disk c:"
　　exit()
　　pages = 50
　　save = "c:\\image_baidu"
　　print "The images will be stored in folder \"c:\\image_baidu\"."
　　rn = 21
　　pn = 18
　　ln = 2000
　　for i in range(0, pages):
　　thread.start_new_thread(GetAllJpg,(url,))
　　url = GetBaiduNextPage(url)
　　while True:
　　pass

页: [1]

运维网's Archiver

python多线程下载图片