|
import urllib.request
import socket
import re
import sys
import os
import socket
import random
import threading
targetDir = r"C:\pic"
def destFile(path):
if not os.path.isdir(targetDir):
os.mkdir(targetDir)
pos = path.rindex('/')
a = random.randint(1,10000)
b = '%d' %a
t = os.path.join(targetDir, b+path[pos+1:])
return t
def getPic(link):
try:
urllib.request.urlretrieve(link, destFile(link))
except:
pass
if __name__ == "__main__":
m=4
for i in range(86981,131306):#71460,131306
hostname = "http://www.xxx.com/html/tupian/xxx/%d.html" %(i)
req = urllib.request.Request(hostname)
if(m==4):
try:
webpage = urllib.request.urlopen(req)
except:
print(i)
m=0
continue
else:
m=m+1
continue
contentBytes = webpage.read()
print(i)
print("*************************************")
s=0;
threads = []
# for k in range(10):
for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))):
print(link)
try:
socket.setdefaulttimeout(2)
urllib.request.urlopen(link)
except:
break
try:
d=threading.Thread(target=getPic,args=(link,))
threads.append(d)
#d.start()
# urllib.request.urlretrieve(link, destFile(link))
except:
pass
s=s+1
for c in range(s):
threads[c].start()
print(c)
|
|