python网页爬虫浅析

zhaolu 发表于 2018-8-13 11:05:27

　　
def getImg2(html, initialFile, finalFile):
　　
reg = '"*'       #split string html with " and write in file name re.txt
　　
imgre1 = re.compile(reg)
　　
imglist = re.split(imgre1, str(html))
　　
f1 = open(initialFile, mode='w')
　　
for index in imglist:
　　
   f1.write("\n")
　　
   f1.write(index)
　　
f1.close
　　
reg2 = "^https.*jpg" # match items start with "https" and ends with "jpg"
　　
imgre2 = re.compile(reg2)
　　
f2 = open(initialFile, mode='r')
　　
f3 = open(finalFile, mode='w')
　　
tempre = f2.readlines()
　　
for index in tempre:
　　
   temp = re.match(imgre2,index)
　　
   if temp != None:
　　
         f3.write(index)
　　
         #f3.write("\n")
　　
f2.close()
　　
f3.close()

页: [1]

运维网's Archiver

python网页爬虫浅析