zi663227 发表于 2017-2-21 11:29:44

NodeJS Server 进程自动重启

  背景:
  NodeJS的特点是单进程,事件驱动,非阻塞式IO编程,当主进程抛异常挂掉,整个NodeJS Server就会停止。
  对当前的NodeJS进程进行监控非常重要,NodeJS的进程的停止时,能在指定时间内重启动,继续提供服务。
  思路:
  1.起一个守护进程,用于与各子进程(虚拟进程)进行心跳通信,官运亨通护进程监测子进程是否有回应,若三次连接后没有回应,则将该进程进行重启。
  2.子进程与守护进程进行心跳通信,若守护进程不存在,则子进程自动退出。
  示例图:
  

  守护进程:bootstrap.js

/**
* @author wubocao
* node程序启动入口模块
* 1:设置允许环境当前路径cwd为该文件目录
* 2:启动守护进程,运行主服务
* 3:监听关闭事件,关闭主服务并退出
*/
//日志
console.log("start bootstrap");
var path = require("path");
var addDeamon = require("./deamon.js").addDeamon;
var file = require.main.filename, path = path.dirname(file);
process.chdir(path);
var modulesNames = [], args = [], deamons = [];
if (process.argv && process.argv.length) {
for ( var i = 0, len = process.argv.length; i < len; i++) {
if (process.argv == '-m') {
var names = process.argv[++i];
if (names) {
modulesNames = modulesNames.concat(names.split("|"));
}
} else if (process.argv == '-ppid') {//过滤掉ppid参数
i++;
continue;
} else {
args.push(process.argv);
}
}
}
// 可以在此处设置默认载入默认模块
if (modulesNames.length == 0) {
console.log('please defined the modules like: node bootstrap.js -m main1.js -m main2.js');
return;
// modulesNames.push('main');
}
console.log(modulesNames);
modulesNames.forEach(function(moduleName) {
deamons.push(addDeamon(moduleName, args));
});
process.on("exit", function() {
console.log("parent exit");
deamons.forEach(function(deamon) {
deamon.stop();
});
});
process.on("SIGQUIT", function() {
console.log("request for exit");
deamons.forEach(function(deamon) {
deamon.stop();
});
process.exit(0);
});
  守护进程新建一个或者多个daemon对象,每一个daemon启动一个新的业务进程:daemon.js

/**
* @author wubocao
* 守护进程模块
* 使用addDeamon(model,args,option)来添加一个守护进程
* 该函数返回一个守护进程对象,通过调用该对象的stop和init来停止和重新启动该进程
*
*/
var cp = require("child_process");
var util = require("util");
//对象深拷贝
function copyObj(obj, stack) {
stack = stack || [];
var t;
if (obj == null) {
return t;
}
if (util.isArray(obj)) {// 数组
var instance = copyObj.getStack(obj, stack);
if (instance) {
return instance;
}
var len = obj.length;
t = new Array(len);
stack.push([ obj, t ]);
for ( var i = 0; i < len; i++) {
t = copyObj(obj);
}
} else if (typeof obj == "object") {
var instance = copyObj.getStack(obj, stack);
if (instance) {
return instance;
}
t = {};
stack.push([ obj, t ]);
for ( var k in obj) {
t = copyObj(obj);
}
} else {
t = obj;
}
return t;
}
copyObj.getStack = function(obj, stack) {
for ( var i = stack.length; i--;) {
if (stack === obj) {
return stack;
}
}
return null;
};
// 守护进程对象
function deamon(model, args, option) {
if (!model || typeof model != "string") {
throw new Error("illegal model argument");
}
var __args;
if (args) {
if (util.isArray(args)) {
__args = copyObj(args);
} else {
__args = [ args ];
}
}
var __opt;
if (typeof option == "object") {
__opt = copyObj(option);
} else {
__opt = {};
}
this.__model = model;
this.__args = __args;
this.__opt = __opt;
this.__cpr = null;
this.__cprid = 0;
this.__heartbeat = 0;
this.init();
}
deamon.prototype = {
init : function() {
if (this.__cpr) {
return;
}
this.__kill = false;
console.log("deamon init");
var exeTime = this.__opt.timeout;
var start = new Date().getTime();
var context = this;
(function run() {
console.log("process start");
context.__cpr = cp.fork(context.__model, context.__args, context.__opt);
context.__cprid = context.__cpr.pid;
context.__cpr.on("exit", function(e) {
console.log("process exit");
if (context.__kill) {
return;
}
if (exeTime > 0) {
var end = new Date().getTime();
if (end - start < exeTime) {
run();
} else {
context.__cpr = null;
context.__cprid = 0;
}
} else {
run();
}
});
context.__cpr.on("message", function(message) {
if (typeof message == "object") {
switch (message.name) {
case "proccessInfo":// 进程信息(心跳检查)
context.__messageCall && context.__messageCall(message.value);
break;
case "broadcast":// 经常广播消息
try {
context.__cpr.send(message.value);
} catch (e) {
console.error("broadcast message error:", e);
}
break;
}
}
});
})();
// 开始监控心跳
this.startHeartbeat();
},
stop : function() {
if (this.__cpr) {
console.log("deamon stop");
this.__kill = true;
this.__cpr.disconnect();
this.__cpr.kill('SIGQUIT');
this.__cpr = null;
this.__cprid = 0;
}
},
stopForce : function() {
if (this.__cpr) {
console.log("deamon stop force");
this.__kill = true;
// this.__cpr.kill('SIGKILL');
cp.exec("kill -9 " + this.__cprid);
this.__cpr = null;
this.__cprid = 0;
}
},
getInfo : function(callback, msg) {
if (this.__cpr) {
this.__messageCall = callback;
try {
if (msg) {
console.log("try get child process info with message[" + msg + "]");
}
this.__cpr.send({
name : "proccessInfo",
msg : msg || ""
});
} catch (e) {
console.error("send message 'proccessInfo' error:", e);
}
} else {
console.error("no child process when get child process info");
}
},
//开始心跳
startHeartbeat : function() {
var deamon = this;
//先停掉原来的心跳
this.stopHeartbeat();
//times为监控心跳连续失败次数
var times = 0;
//心跳检查
function checkDeamon() {
//做1500毫秒等待,判断deamon子进程是否挂掉
var t = setTimeout(function() {
times++;
t = 0;
if (times >= 3) {
console.log("heart check with no response more then 3 times,restart now");
times = 0;
deamon.stopHeartbeat();
deamon.stopForce();
setTimeout(function() {
deamon.init();
}, 1000);
}
}, 1500);
deamon.getInfo(function(memInfo) {
if (t != 0) {
clearTimeout(t);
t = 0;
}
times = 0;
//console.log(memInfo);
}, times > 0 ? "retry with times:" + times : "");
}
//每5秒获取一下
this.__heartbeat = setInterval(checkDeamon, 5000);
},
//停止心跳
stopHeartbeat : function() {
this.__heartbeat = this.__heartbeat && clearInterval(this.__heartbeat);
}
};
exports.addDeamon = function(model, args, option) {
args = args || [];
// 过滤掉ppid参数
for ( var i = 0, len = args.length; i < len; i++) {
if (args == '-ppid') {
i++;
}
}
return new deamon(model, args.concat([ '-ppid', process.pid ]), option);
}
  监控进程monitor.js,此JS由业务JS引入,用于和daemon进行心跳通信,确保进程是活动进程:

require('./monitor/module_listener.js');
(function(){
// 开始心跳,与父进程联系
if (process.argv && process.argv.length) {
for ( var i = 0, len = process.argv.length; i < len; i++) {
if (process.argv == '-ppid') {// ppid参数,由父进程启动的
console.log('startHB');
startHB();
break;
}
}
}
// 开始心跳
function startHB() {
// 退出信号处理
process.on("SIGQUIT", function() {
console.log("request for exit");
process.exit(0);
});
// 与父进程断开联系信号处理
process.on("disconnect", function() {
console.log("request for exit");
process.exit(-1);
});
// 心跳消息处理
process.on("message", function(message) {
console.log('child receive msg: ' + message);
if (typeof message == "object") {
if (message.name == "proccessInfo") {
process.send({
name : "proccessInfo",
value : process.memoryUsage()
});
}
} else if (typeof message === "string") {
switch (message) {
case "heartbeat":// 心跳回包
if (heartbeatTimer) {
times = 0;
clearTimeout(heartbeatTimer);
heartbeatTimer = 0;
}
break;
}
}
});
// times为监控心跳连续失败次数
var times = 0, heartbeatTimer;
// 心跳检查
function checkParent() {
// 做1500毫秒等待,判断deamon子进程是否挂掉
heartbeatTimer = setTimeout(function() {
times++;
t = 0;
if (times >= 3) {
times = 0;
console.log("heart check with no response more then 3 times,exit now");
process.exit(-1);
}
}, 1500);
times > 0 && console.log("try get parent heartbeat " + times + " times");
//心跳发包
process.send({
name : "broadcast",
value : "heartbeat"
});
}
//每5秒获取下
setInterval(checkParent, 5000);
}
})();
  业务示例:
  main1.js:

var http = require('http');
console.log('init main1: pid = ' + process.pid);
require('./monitor.js');
http.createServer(function(req, res){
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Hello World, main1 \n');
}).listen('8938');
console.log('main1 server running at http://127.0.0.1:8938');
  main2.js

var http = require('http');
console.log('init main2: pid = ' + process.pid);
require('./monitor.js');
http.createServer(function(req, res){
res.writeHead(200, {'Content-Type': 'text/plain'});
res.write('afdfadfdafdas');
res.end('Hello World main 2\n');
}).listen('8937');
console.log('main2 server running at http://127.0.0.1:8937');
  注意:需要在main1.js和main2.js中引入
  启动进程:
  node bootstrap.js -m main1.js -m main2.js
  源码中,还包含了一个node.sh,用于管理start或者是restart, stop 等操作:
  sudo chmod +x node.sh
  ./node.sh bootstrap.js -m main1.js -m main2.js start
页: [1]
查看完整版本: NodeJS Server 进程自动重启