Python codecs模块清洗postfix maillog一例
#!/usr/bin/env python# -*- coding: utf8 -*-
#author: zhangdonghong
#email: zhangdonghongemail@163.com
#date: 2014-11-13
import re
import sys
import codecs
import datetime
def parser(filename):
i = 1
for r in codecs.open(filename, 'r'):
if "postfix/smtp" in r and "to=<" in r and "NOQUEUE" not in r:
#print i, r
r1 = ' '.join(r.split()[:3])
r2 = r.split().strip(':')
mail_to = re.findall(r"to=<([^>]*)", r)
mail_domain = re.findall(r"@([^>]*)", r)
mail_relay = re.findall(r"relay=([^\[]*)", r)
if "delay=" not in r:
mail_delay = "0"
else:
mail_delay = re.findall(r"delay=([^,]*)", r)
if "delays=" not in r:
mail_delays = "0"
else:
mail_delays = re.findall(r"delays=([^,]*)", r)
if "dsn=" not in r:
mail_dsn = "0"
else:
mail_dsn = re.findall(r"dsn=([^,]*)", r)
mail_status = re.findall(r"status=([^\s]*)", r)
tmp_to = '|'.join()
open('to_result.txt', 'a').write(tmp_to+"\n")
if "from=<" in r and "NOQUEUE" not in r and "status=expired" not in r and "postfix/pickup" not in r:
#print i, r
r1 = ' '.join(r.split()[:3])
r2 = r.split().strip(':')
mail_from = re.findall(r"from=<([^>]*)", r)
if mail_from == '':
mail_from = 'null'
mail_size = re.findall(r"size=([^,]*)", r)
mail_nrcpt = re.findall(r"nrcpt=([^\s]*)", r)
tmp_from = '|'.join()
open('from_result.txt','a').write(tmp_from+"\n")
#if i == 50:
# sys.exit()
#i = i + 1
def main():
if len(sys.argv) == 1:
parser(sys.argv)
else:
print 'Usage: parse_to_file.py filename'
if __name__ == '__main__':
tmp_from=""
tmp_to=""
now = datetime.datetime.now().strftime('%Y-%m-%d')
main()
页:
[1]