python网页爬虫浅析
def getImg2(html, initialFile, finalFile):
reg = '"*' #split string html with " and write in file name re.txt
imgre1 = re.compile(reg)
imglist = re.split(imgre1, str(html))
f1 = open(initialFile, mode='w')
for index in imglist:
f1.write("\n")
f1.write(index)
f1.close
reg2 = "^https.*jpg" # match items start with "https" and ends with "jpg"
imgre2 = re.compile(reg2)
f2 = open(initialFile, mode='r')
f3 = open(finalFile, mode='w')
tempre = f2.readlines()
for index in tempre:
temp = re.match(imgre2,index)
if temp != None:
f3.write(index)
#f3.write("\n")
f2.close()
f3.close()
页:
[1]