python写的批量下载baidu mp3的程序至少到09-9-18仍然可用

hugang · 发表于 2015-4-22 02:07:29

　　windows平台下的Py2.6 移植到linux应该也很容易
默认10线程下载

　　其中多线程下载部分是参考 http://hi.baidu.com/zjw0358/blog

mydown.py
#!/usr/bin/env python
# coding=utf-8
import httplib,urllib,urllib2
import re,os
from downmp3 import GetSize,DownMp3
def BaiduUrlDecode(enurl):
import string
from urllib import unquote
k = u'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
decoded = ''
# print enurl
try:
      key = k.index(u't') - k.index(enurl[1])
      for i in range(enurl.__len__()):
         char = enurl
         if char in k:
            decoded += k[(k.index(char) + key) % 62]
         else:
            decoded += char
except IndexError:
      print 'enurl IndexError:',enurl,'$'
      # print 'enurl[1]',enurl[1]
return unquote(decoded.encode('gbk'))
def BadiuUrlProcess(baidu_url):
import re
from urllib import quote
match_CHchar = r'([^:._,~`!@#\|{}\^\*;%/\"\'\[\]\+\=\?\-\$\&\\\w]+)' #匹配中文
ch_pattern = re.compile(match_CHchar)
page_url = baidu_url
if ch_pattern.search(baidu_url):
      page_url = ch_pattern.sub(quote(ch_pattern.search(baidu_url).groups()[0].encode('gbk')),baidu_url)
return page_url

if __name__ == "__main__":

print """ 支持百度MP3的大部分列表音乐的下载，默认采用10线程下载

      [1] 新歌top100
      [2] 歌曲top500
      [3] 歌手top200 (暂不支持下载)
      [4] 中文金曲榜
      [5] 经典老歌
      [6] 热舞dj
      [7] 流金岁月
      [8] 电视金曲
      [9] 歌曲列表
      [0] 退出
                              --by auxten auxtenwpc[at]gmail[dot]com
""".decode('utf-8').encode('gbk')
id = int(raw_input('输入你想下载的list的编号: '.decode('utf-8').encode('gbk')))
if id == 1: topid = '/list/newhits.html?id=1?top1'
elif id == 2: topid = '/topso/mp3topsong.html?id=1?top2'
elif id == 3: topid = '/list/tvs.html?id=1?top5';exit(1)
elif id == 4: topid = '/list/bangping.html?id=1'#;exit(1)
elif id == 5: topid = '/list/oldsong.html?top6'
elif id == 6: topid = '/list/dj.html'
elif id == 7: topid = '/list/liujinsuiyue.html'
elif id == 8: topid = '/list/tvs.html?id=1?top5'
elif id == 9: topid = '/list/tvs.html?id=1?top5'#;exit(1)
elif id == 0: exit(1)
# topid = '/list/oldsong.html?top6'
# topid = '/list/tvs.html?id=1?top5'
print "Processing please wait .:)"
errorlist = []
conn = httplib.HTTPConnection('list.mp3.baidu.com')
conn.request("GET",topid)
response = conn.getresponse()
html = response.read().decode('gb18030')
# print html.encode('gbk')
conn.close()
match_type1 = r'">(\d{,3})\.' #编号
match_type2 = r'">(\d{,3})\.[\s\S]*?(.*?)[^)].*' #no url songname for 流金岁月
match_type3 = r'">(\d{,3})\.[\s\S]*?(.*?)[^)].*">(.*)\)' #name+author
match_type4 = r'[\s]*?(\d{,3})[\s\S]*?(.*?)[\s\S]*?target="_blank">(.*?)' #no url songname singer 中文金曲榜
list_number = re.findall(match_type1, html)
list_all = re.findall(match_type3, html)
if list_all == []:
      # print 'list_all empty1!'
      list_all = re.findall(match_type2, html)
if list_all == []:
      # print 'list_all empty2!'
      list_all = re.findall(match_type4, html)
# print 'list_all',list_all
# print 'list_all__len__',list_all.__len__()
conn = httplib.HTTPConnection('mp3.baidu.com')
songnumlst = range(0,list_all.__len__())

for num in songnumlst:
      try:
         try: authorname = '-'+list_all[num][3]
         except IndexError:
            authorname = ''
         print list_all[num][0].encode('gbk'),list_all[num][2].encode('gbk'),authorname.encode('gbk')
#       print num
         conn.request("GET",BadiuUrlProcess(list_all[num][1]))
         # print "URL!",BadiuUrlProcess(list_all[num][1]).encode('gbk')
         response = conn.getresponse()
         html = response.read().decode('gb18030')
         conn.close()
         # print html.encode('gbk')
         html = re.search(r'

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] python写的批量下载baidu mp3的程序至少到09-9-18仍然可用

浏览过的版块

扫码加入运维网微信交流群

[经验分享] python写的批量下载baidu mp3的程序 至少到09-9-18仍然可用

浏览过的版块

[经验分享] python写的批量下载baidu mp3的程序至少到09-9-18仍然可用