用Python将多个日志文件整合成一个

zhu894532094 · 发表于 2017-5-5 11:58:47

项目架构发生了变化，原来的单机单应用要部署成集群了。这样在排查问题看日志的时候就不太方便了。最近想起来可以将日志按用户打印，也就是每个用户一个日志文件（整个系统最多一万个用户整个还是没得问题的）通过一个程序将分布在多台上的日志文件下载到本地然后在做一个按日志内容（每一行有个时间戳）的排序。
上班的地方上不了外网，所以程序在我的电脑上调试了。本地的环境是python2.7 ，开发工具是eclipse+pydev ，习惯了使用eclipse 使用这个开发工具调试python很方便。
   程序的主要逻辑分三步：
   第一步：用ftp下载目标日志文件到本地，目标日志可能分布在不同的ip上。
   第二步：将多个日志的内容装到dict对象。日志的时间戳是键，时间戳那一行是值。然后排序。
   第三步：输出到指定文件

第一次写python程序，代码不太优美：

#!/bin/env python
#coding=gbk
import sys
import os
import re
import datetime
from ftplib import FTP

'''将一个用户的多个日志进行排序，排序后输出一个日志'''
'''运行方式 python getUserLog TNT3.TNT301
及：ptyhon命令程序文件名机构用户号'''

#ftp下载目标用户日志
def ftp_down():
ftp = FTP()
ftp.set_debuglevel(0)
ftp.connect(ftpip,21)
ftp.login(userid,pswd)
#ftp.login()
#print ftp.getwelcome()
#print ftp.pwd()

print ftp.dir()
filename = bankuserid+'.log'
with open(filename,'wb') as file_handler:
ftp.retrbinary('RETR '+filename, file_handler.write)
file_handler.close()
ftp.quit()
print '从 '+ftpip+'下载 '+filename+' 成功'

#将配置的路径下的用户日志加载到字典和列表
def loadFiles(logPath,logFilesList):
userLogFileName = []
#字典
userLogMap = {}
for userFilePath in logFilesList:
if os.path.isfile(logPath+userFilePath) and bankuserid in userFilePath:
userLogFileName.append(logPath+userFilePath)
print '将'+userFilePath+'放入了列表'
#循环读取日志文件的内容
for ulfn in userLogFileName:
#open函数文件名不支持“-”不知道是位什么
with open(ulfn,'rb') as tempFile:
row_key = ''
row_value= ''
last_row_key=''
last_row_value=''
row_key_count = 0
while True:
line = tempFile.readline()
if not line:
break
#匹配日期
r1 = re.compile('^([0-9]{4}-[0-9]{2}-[0-9]{2}[ \t\n\r\r\v][0-9]{2}[:][0-9]{2}[:][0-9]{2}[\.][0-9]{3})')
#如果匹配先将上一轮的key和value写到列表和字典
if r1.match(line):
row_key_count = row_key_count+1
row_key_count_str = seq(str(row_key_count),7)
if not last_row_key.__eq__(''):
last_rwo_key = last_row_key+'.'+row_key_count_str
userLogMap[last_rwo_key] = last_row_value
#print '字典中增加：key:'+last_rwo_key+' value'
row_key =''
row_value = ''
last_row_key = ''
last_row_value = ''
#又定义新的
row_key = re.findall(r1,line)
row_value = line
last_row_key = row_key[0]
last_row_value = row_value
else:
last_row_value =last_row_value+line
#最后要写一次
if not last_row_key.__eq__(''):
#print '字典中增加：'+last_row_value
userLogMap[last_row_key] = last_row_value
tempFile.close()
print('加载日志内容到dic成功。一共有'),
print(len(userLogMap)),
print('个时间戳')
return userLogMap

#对元组内的数据进行升序排序
def sortColl(arg1):
m = sorted(arg1.iteritems(), key=lambda d:d[0], reverse = False)
return m
#arg1是map容器；arg2是产生的结果文件路径
def write2file(arg1,arg2):
with open(arg2,'wb+') as tempFile:
for row in arg1:
#print row
tempFile.write(row[1])
print('生成'+arg2+'成功')
tempFile.close
#将str的长度补到len位（不足时前面加0）
def seq(seqStr,seqLen):
if len(seqStr)>seqLen:
return seqStr
else:
return seq('0'+seqStr,seqLen)

#定义初始化的日志文件路径
logPath = r'E:/python code/'
ftpip = '192.168.1.100'#ftp的ip
userid = 'Feng'#ftp用户名
pswd = 'soft'#ftp密码
bankuserid = 'TNT3.TNT301'#目标用户的机构号用户编号如TNT3.TNT301
'''argCount = len(sys.argv)
#如果未传用户名则退出
if argCount<2:
print('请输入用户编号')
sys.exit()
else:
bankuserid = sys.argv[1]
print(bankuserid)'''
logFilesList = os.listdir(logPath)
print logPath,'目录下的文件有：共',len(logFilesList),'个'
#开始调用
time_ftp_start = datetime.datetime.now().microsecond
print '开始用ftp下载'
#ftp_down()
time_ftp_end = datetime.datetime.now().microsecond
print 'ftp下载结束,耗时',(time_ftp_end-time_ftp_start)/1000,'ms。开始加载到dic'
userLogMap = loadFiles(logPath,logFilesList)
time_load_end = datetime.datetime.now().microsecond
print '加载到dic结束,耗时',(time_load_end-time_ftp_end)/1000,'ms。开始排序'
userLogMap = sortColl(userLogMap)
time_sort_end = datetime.datetime.now().microsecond
print '排序结束,耗时',(time_sort_end-time_load_end)/1000,'ms。开始输出到磁盘'
write2file(userLogMap,r'c:\result.log')
time_write_end = datetime.datetime.now().microsecond
print '输出到磁盘结束,耗时',(time_sort_end-time_load_end)/1000,'ms'

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] 用Python将多个日志文件整合成一个

浏览过的版块

扫码加入运维网微信交流群