LXY3800 发表于 2017-5-19 07:51:47

Hive Job log解析——perl脚本

  Hive Job log解析
  catparseHiveJobLog.pl

use Data::Dumper;
if (!defined($ARGV) or $ARGV eq '')
{
print "useage: perl parseJobHistory.pl filename\n";
exit;
}
my $filename = $ARGV;
my $json = new JSON;
my %QueryStatus;
my %TaskStatus;
open FILE, "<$filename" or print("can't open $filename\n");
while(<FILE>)
{
chomp;
my $spacepos = index($_,' ');
my $type = substr($_,0,$spacepos);
my $info = substr($_,$spacepos+1,length $_);
if(lc($type) eq 'querystart' || lc($type) eq 'queryend')
{
my %parseBuffer;
$info =~ s/:" /:"/g;
while($info =~ s/(\w+?)="(.*?)"( |$)//)
{
$parseBuffer{$1} = $2;
}
my $queryid = $parseBuffer{'QUERY_ID'};
foreach my $key ( keys %parseBuffer )
{
next if $key eq 'QUERY_ID';
$QueryStatus{$queryid}{$key} = $parseBuffer{$key};
}
}elsif(lc($type) eq 'taskstart' || lc($type) eq 'taskend')
{
my %parseBuffer;
$info =~ s/:" /:"/g;
while($info =~ s/(\w+?)="(.*?)"( |$)//)
{
$parseBuffer{$1} = $2;
}
my $queryid = $parseBuffer{'QUERY_ID'};
my $taskid = $parseBuffer{'TASK_ID'};
foreach my $key ( keys %parseBuffer )
{
next if $key eq 'QUERY_ID' || $key eq 'TASK_ID';
if($key eq 'TASK_COUNTERS')
{
my %taskcounters;
while ($parseBuffer{$key} =~ s/(.*?):(.*?)(,|$)//)
{
$taskcounters{$1} = $2;
}
foreach my $ckey ( keys %taskcounters )
{
$TaskStatus{$queryid}{$taskid}{$key}{$ckey} = $taskcounters{$ckey};
}
}else
{
$TaskStatus{$queryid}{$taskid}{$key} = $parseBuffer{$key};
}
}
}
%parseBuffer = {};
}
close(FILE);
print Dumper %QueryStatus;
print Dumper %TaskStatus;
页: [1]
查看完整版本: Hive Job log解析——perl脚本