Python札记1-HTTP Download

wdx1992828 · 发表于 2017-4-30 14:10:47

# -*- coding: utf-8 -*-
__author__ = 'gull'
import os, urllib2, log_factory
from urlparse import urlsplit
def get(url, filePath, fileName = None, buffer = 16 * 1024):
log = log_factory.getLogger()
log.info("send http request to %s", url)
def writefile(fsrc, fdst, totalLength):
"""copy data from file-like object fsrc to file-like object fdst"""
if not totalLength:
totalLength = "?"
else:
totalLength = float(totalLength)
bytesRead = 0.0
while 1:
buf = fsrc.read(buffer)
if not buf:
break
fdst.write(buf)
bytesRead += len(buf)
if totalLength != "?":
log.info("%s: %.02f/%.02f kb (%d%%)" % (
fileName,
bytesRead / 1024.0,
totalLength / 1024.0,
100 * bytesRead / totalLength
))
else:
log.info("%s: %.02f/? kb (?%%)" % (
fileName,
bytesRead / 1024.0
))
def getFileName(openUrl):
if 'Content-Disposition' in openUrl.info():
# If the response has Content-Disposition, try to get filename from it
cd = dict(map(
lambda x: x.strip().split('=') if '=' in x else (x.strip(), ''),
openUrl.info().split(';')))
if 'filename' in cd:
filename = cd['filename'].strip("\"'")
if filename: return filename
# if no filename was found above, parse it out of the final URL.
return os.path.basename(urlsplit(openUrl.url)[2])
def getFileLength(openUrl):
return openUrl.info().getheader("Content-Length")
r = urllib2.urlopen(urllib2.Request(url), timeout = 120) #timeout is 120s
try:
fileName = fileName or getFileName(r)
fullfileName = "%s%s%s" % (filePath, os.path.sep, fileName)
totalLength = getFileLength(r)
log.info("write response date to %s", fullfileName)
with open(fullfileName, 'wb') as f:
writefile(r, f, totalLength)
return fileName, totalLength, fullfileName
finally:
r.close()
log.info("http request finished.")
　　参数说明：
　　

url:即下载路径，如http://apache.etoak.com/tomcat/tomcat-7/v7.0.20/bin/apache-tomcat-7.0.20.tar.gz
filePath:下载文件保存的文件夹
fileName:下载后保存的文件名，可选参数。若为空，则会却reponse header中的filename信息（如下图）继续判空，则取url后缀名（如:apache-tomcat-7.0.20.tar.gz)

buffer:下载缓冲区大小，默认16k

　　可继续加入以下特性:
　　

支持代理
文件分块，多线程下载
异步下载，回调机制
...

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] Python札记1-HTTP Download

浏览过的版块

扫码加入运维网微信交流群