|
之前用python抓取豆瓣的音乐,保存到sxl中,最近在玩mongodb,就有了如下代码,主要涉及到nodejs连接mongodb并操作它,和http模块抓取网页内容
没什么深奥的原理,不多说,代码如下:
var mongo = require('mongodb'),
Server = mongo.Server,
Db = mongo.Db,
server = new Server('localhost', 27017, {auto_reconnect: true}),
db_name = 'test',
db_user = 'root',
db_pass = '111111',
table_name = 'douban',
db = new Db(db_name, server);
var http = require('http'),
channel = 1,
options = {
host: 'douban.fm',
port: 80,
path: '/j/mine/playlist?type=n&channel='+channel,
},
interval = 3000;
var globalIndex=0;
db.open(function(err, db) {
if(!err) {
db.authenticate(db_user, db_pass, function(err, result) {
db.collection(table_name, function(err, collection) {
var timer = setInterval(function(){
sigleIndex = 0;
http.get(options,function(res){
var buffers = [];
res.on('data', function(buffer) {
buffers.push(buffer);
});
res.on('end', function() {
var html = buffers.join('');
var data = JSON.parse( html );
var song = data.song;
for(var i=0;i |
|
|