import re
r = re.compile("\d+")
f = open("log","r")
head = re.compile("HEAD /[a-zA-Z/]*")
options = re.compile("OPTIONS /[a-zA-Z/0-9?=&]*")
post = re.compile("[POST|GET] /[a-zA-Z0-9/]*")
hc = re.compile("[HTTP/1.1|HTTP/1.0]\" \d*")
ma = re.compile(":7199 [0-9.]*")
result = {}
for read in f.readlines():
url=http_code=http_time = None
if options.search(read):
#print options.search(read).group()
continue
match = post.search(read)
if match:
url = str(match.group())[2:].strip()
if not url and head.search(read):
url = str(head.search(read).group())[5:].strip()
url = r.sub("0",url)
if url[len(url)-1:]!='/':
url = url+'/'
match = hc.search(read)
if match:
http_code = str(match.group())[3:]
match = ma.search(read)
if match:
http_time = str(match.group())[6:]
if len(http_time)==0:
http_time = 0
else:
http_time = float(http_time)
if not result.get(url):
if not http_time:
http_time=0
result[url] = {"url":url,"http_time":http_time,"http_count":1,"http_code":{http_code:1}}
else:
if not http_time:
http_time=0
if result[url]["http_code"].get(http_code) is None:
result[url]["http_code"][http_code] = 1
else:
result[url]["http_code"][http_code] = result[url]["http_code"][http_code]+1
result[url]["http_count"] = result[url]["http_count"]+1
ht = result[url]["http_time"] +http_time
result[url]["http_time"] = ht
list = []
for r in result:
http_count = result[r]["http_count"]
result[r]["http_time"] = result[r]["http_time"]/http_count
list.append(result[r])
list.sort(cmp=lambda x,y : cmp(x["http_count"], y["http_count"]),reverse=True)
for x in list:
print "|%s |%s |%s |%s|" %(x["url"],x["http_count"],x["http_time"],x["http_code"])