设为首页 收藏本站
查看: 2024|回复: 0

[经验分享] ·「python爬虫入门」网易云音乐下载

[复制链接]

尚未签到

发表于 2015-11-30 12:44:22 | 显示全部楼层 |阅读模式
  2015-6-2
  今天把昨天Git上看的一个下载网易云音乐歌单歌曲的脚本尝试看懂并修改
  Git地址:https://github.com/keli/netease-music


DSC0000.gif DSC0001.gif


#! /usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import json
import os
import sys
import md5
import string
import random
# Set cookie
cookie_opener = urllib2.build_opener()
cookie_opener.addheaders.append(('Cookie', 'appver=2.0.2'))
cookie_opener.addheaders.append(('Referer', 'http://music.163.com'))
urllib2.install_opener(cookie_opener)
def encrypted_id(id):
byte1 = bytearray('3go8&$8*3*3h0k(2)2')
byte2 = bytearray(id)
byte1_len = len(byte1)
for i in xrange(len(byte2)):
byte2 = byte2^byte1[i%byte1_len]
m = md5.new()
m.update(byte2)
result = m.digest().encode('base64')[:-1]
result = result.replace('/', '_')
result = result.replace('+', '-')
return result
def get_playlist(playlist_id):
url = 'http://music.163.com/api/playlist/detail?id=%s' % playlist_id
resp = urllib2.urlopen(url)
data = json.loads(resp.read())
return data['result']
def save_track(track, folder, position):
name = track['hMusic']['name']
if position < 10:
pos = "0%d" % position
else:
pos = "%d" % position
#fname = pos + ' ' + name + track['hMusic']['extension']
fname = name + '.' + track['hMusic']['extension']
fname = string.replace(fname, '/', '_')
fpath = os.path.normpath(os.path.join(folder, fname))
if os.path.exists(fpath):
return
print "Downloading", fpath, "..."
dfsId = str(track['hMusic']['dfsId'])
url = 'http://m%d.music.126.net/%s/%s.%s' % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track['hMusic']['extension'])
resp = urllib2.urlopen(track['mp3Url'])
data = resp.read()
resp.close()
with open(fpath, 'wb') as mp3:
mp3.write(data)
def download_playlist(playlist_id, folder='.'):
playlist = get_playlist(playlist_id)
name = playlist['name']
folder = os.path.join(folder, name)
if not os.path.exists(folder):
os.makedirs(folder)
for idx, track in enumerate(playlist['tracks']):
save_track(track, folder, idx+1)
if __name__ == '__main__':
if len(sys.argv) < 2:
print "Usage: %s <playlist id>" % sys.argv[0]
sys.exit(1)
download_playlist(sys.argv[1])
View Code  这边是对cookie的处理,addheaders的方法之前没有看到过,help查询居然也没有查到,但是有用



cookie_opener = urllib2.build_opener()
cookie_opener.addheaders.append(('Cookie', 'appver=2.0.2'))
cookie_opener.addheaders.append(('Referer', 'http://music.163.com'))
urllib2.install_opener(cookie_opener)
  这一段其实是没有用的,但是挺好奇这段代码里面函数的作用,过段时间再去学一下



def encrypted_id(id):
byte1 = bytearray('3go8&$8*3*3h0k(2)2')
byte2 = bytearray(id)
byte1_len = len(byte1)
for i in xrange(len(byte2)):
byte2 = byte2^byte1[i%byte1_len]
m = md5.new()
m.update(byte2)
result = m.digest().encode('base64')[:-1]
result = result.replace('/', '_')
result = result.replace('+', '-')
return result
  
  下面这段代码是发挥主要作用的,但是这个脚本是原作者三个月前写的,网易云音乐应该有一些变化



def get_playlist(playlist_id):
url = 'http://music.163.com/api/playlist/detail?id=%s' % playlist_id
resp = urllib2.urlopen(url)
data = json.loads(resp.read())
return data['result']
def save_track(track, folder, position):
name = track['hMusic']['name']
if position < 10:
pos = "0%d" % position
else:
pos = "%d" % position
#fname = pos + ' ' + name + track['hMusic']['extension']
fname = name + '.' + track['hMusic']['extension']
fname = string.replace(fname, '/', '_')
fpath = os.path.normpath(os.path.join(folder, fname))
if os.path.exists(fpath):
return
print "Downloading", fpath, "..."
dfsId = str(track['hMusic']['dfsId'])
url = 'http://m%d.music.126.net/%s/%s.%s' % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track['hMusic']['extension'])
resp = urllib2.urlopen(track['mp3Url'])
data = resp.read()
resp.close()
with open(fpath, 'wb') as mp3:
mp3.write(data)
def download_playlist(playlist_id, folder='.'):
playlist = get_playlist(playlist_id)
name = playlist['name']
folder = os.path.join(folder, name)
if not os.path.exists(folder):
os.makedirs(folder)
for idx, track in enumerate(playlist['tracks']):
save_track(track, folder, idx+1)
  首先是JSON,我之前并没有学过JSON,XML也只是昨天看了一小会儿
  http://music.163.com/api/playlist/detail?id=4566307 打开准备下载的歌单
  用Firebug看Json挺清晰,结构都很清楚,chrome就感觉有点一堆凑一起
  看代码也知道歌曲的链接在tracks里面
  我按着原作者的代码调试 发现一直卡在



name = track['hMusic']['name']
  后来调了半天,才去JSON文件里看了,发现tracks里面不是所有歌曲都有 hMusic 这个 属性
  后来直接改成  name = track['name']
  而且再仔细看JSON 发现代码有好几处不对的地方 最后改成了这样



ef get_playlist(playlist_id):
url = 'http://music.163.com/api/playlist/detail?id=%s' % playlist_id
resp = urllib2.urlopen(url)
data = json.loads(resp.read())
print data['result']['name']
return data['result']
def save_track(track, folder, position):
name = track['name'] #name = track['hMusic']['name']
print name
if position < 10:
pos = "0%d" % position
else:
pos = "%d" % position
#fname = pos + ' ' + name + track['hMusic']['extension']
fname = name + str(position) + '.mp3' #fname = name + str(position) + '.' + track['hMusic']['extension']
fname = string.replace(fname, '/', '_')
fpath = os.path.normpath(os.path.join(folder, fname))
if os.path.exists(fpath):
return
print "Downloading", fpath, "..."
# dfsId = str(track['hMusic']['dfsId'])
# url = 'http:/7m%d.music.126.net/%s/%s.%s' % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track['hMusic']['extension'])
try:
resp = urllib2.urlopen(track['mp3Url'], timeout = 5)
data = resp.read()
resp.close()
except urllib2.URLError as e:
print type(e)    #not catch
pass
except socket.timeout as e:
print type(e)    #catched
pass  
else:
with open(fpath, 'wb') as mp3:
mp3.write(data)

def download_playlist(playlist_id, folder='.'):
playlist = get_playlist(playlist_id)
name = playlist['name']
folder = os.path.join(folder, name)
if not os.path.exists(folder):
os.makedirs(folder)
for idx, track in enumerate(playlist['tracks']):
print 'begin save'
save_track(track, folder, idx+1)
  注释掉的部分就是更改的地方
  在下载歌曲的时候,经常会卡在一个地方,然后知道urlopen可以设置timeout 就是超时时间,
  然后我的想法是超时的话就报错然后继续下载下一首
  但是不是很熟悉try except 试了好久
  试过好几个版本



try:
resp = urllib2.urlopen(track['mp3Url'], timeout = 5)
data = resp.read()
resp.close()
except urllib2.URLError as e:
print type(e)    #not catch
pass
except socket.timeout as e:
print type(e)    #catched
pass  
with open(fpath, 'wb') as mp3:
mp3.write(data)


try:
resp = urllib2.urlopen(track['mp3Url'], timeout = 5)
data = resp.read()
resp.close()
except urllib2.URLError as e:
print type(e)    #not catch
except socket.timeout as e:
print type(e)    #catched
else:
with open(fpath, 'wb') as mp3:
mp3.write(data)


try:
resp = urllib2.urlopen(track['mp3Url'], timeout = 5)
except urllib2.URLError as e:
print type(e)    #not catch
pass
except socket.timeout as e:
print type(e)    #catched
pass  
data = resp.read()
resp.close()
with open(fpath, 'wb') as mp3:
mp3.write(data)
  总之都是代码报错,报错了也没有继续运行下去
  后来改成这样



    try:
resp = urllib2.urlopen(track['mp3Url'], timeout = 5)
data = resp.read()
resp.close()
except urllib2.URLError as e:
print type(e)    #not catch
pass
except socket.timeout as e:
print type(e)    #catched
pass  
else:
with open(fpath, 'wb') as mp3:
mp3.write(data)
  就行了=。=, 还是得再好好看看异常处理那一块
  
  总之这就是一下午的学习,虽然下歌什么的对我并没有什么卵用
  

运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-145372-1-1.html 上篇帖子: 2015/9/10 Python基础(11):错误和异常 下篇帖子: 《Python基础教程》 读书笔记 第五章(下)循环语句
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表