|
生产中使用了SSD磁盘,使用smartctl -a /dev/sdb可以查看到该磁盘的各项值,我们可以根据这些Value和Worst等于或小于thresh时候就要注意了,下面上脚本:
- #!/usr/bin/perl
-
- =head
- check ssd infomation
-
- usage: add "nagios ALL=(root) NOPASSWD: /usr/sbin/smartctl" to /etc/sudoers file;
- =cut
-
- use strict;
- use warnings;
- use Data::Dumper;
- use Getopt::Long;
-
- my ($result,$device,$h,$debug);
- my ($start_time,$use_time) = (time,0.00);;
- my %ssd_attribute = ( 5 => "Reallocated_Sector_Ct",
- 184 => "End_to_End_Error_Detection_Count",
- 225 => "Raw_Read_Error_Rate",
- 232 => "Available_Reserver_Space",
- 233 => "Media_Wearout_Indicator",
- 9 => "Power_On_Hours",
- );
-
- $result = GetOptions ("device=s" => \$device,
- "debug" => \$debug);
-
- $device ||= 'sdb';
- $debug ||=0;
-
- $h->{$device}->{output} = "";
- $h->{$device}->{perfdata} = "";
- $h->{$device}->{status} = 0;
- $h->{$device}->{total_info} = `sudo /usr/sbin/smartctl -a /dev/$device 2>&1`;
- if ( $h->{$device}->{total_info} =~ m{===\s+START\s+OF\s+INFORMATION\s+SECTION\s+===(.*)===\s+START\s+OF\s+READ\s+SMART\s+DATA\s+SECTION\s+===\s+SMART\s+overall-health\s+self-assessment\s+test\s+result:\s+(\w+)[\d\D]+Vendor\s+Specific\s+SMART\s+Attributes\s+with\s+Thresholds([\d\D]+)SMART\s+Error\s+Log\s+Version:\s+(\d+)}is) {
- $h->{$device}->{info_section} = $1;
- $h->{$device}->{smart_test_result} = $2;
- $h->{$device}->{healt_result} = $3;
- print "************************************** get $device healt info sta **************************************\n" if $debug;
- foreach my $line (split /\n/,$h->{$device}->{healt_result}) {
- #if ($line =~ m{(\d+)\s+([^\s]+)\s+([\dx]+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\w+)\s+(\w+)\s+([^\s]+)\s+(\d+)}i) {
- if ($line =~ m{(\d+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)}i) {
- print "ID:$1\tATTRIBUTE_NAME:$2\tFLAG:$3\tVALUE:$4\tWORST:$5\tTHRESH:$6\tTYPE:$7\tUPDATED:$8\tWHEN_FAILED:$9\tRAW_VALUE:$10\n" if $debug;
- $h->{$device}->{healt}->{$1}->{id} = $1;
- $h->{$device}->{healt}->{$1}->{attribute_name} = $2;
- $h->{$device}->{healt}->{$1}->{flag} = $3;
- $h->{$device}->{healt}->{$1}->{value} = $4;
- $h->{$device}->{healt}->{$1}->{worst} = $5;
- $h->{$device}->{healt}->{$1}->{thresh} = $6;
- $h->{$device}->{healt}->{$1}->{type} = $7;
- $h->{$device}->{healt}->{$1}->{updated} = $8;
- $h->{$device}->{healt}->{$1}->{when_failed} = $9;
- $h->{$device}->{healt}->{$1}->{raw_value} = $10;
- }
- }
- print "************************************** get $device healt info end **************************************\n" if $debug;
- $h->{$device}->{smart_error_log_version} = $4;
- $h->{$device}->{match} = 1;
- } else {
- $h->{$device}->{match} = 0;
- }
-
- print "runging..... `sudo /usr/sbin/smartctl -a /dev/$device 2>&1`\n" if $debug;
- print "\n\n-------------------------------- Dumper \$h sta --------------------------------\n" if $debug;
- print Dumper $h if $debug;
- print "-------------------------------- Dumper \$h end --------------------------------\n\n" if $debug;
-
- $use_time = sprintf("%0.2f",time - $start_time);
- if ( (exists $h->{$device}->{match} && $h->{$device}->{match} == 0) ) {
- print "CRITICAL - smartctl get $device total info fail|status=1 time=$use_time\n";
- exit (2);
- } elsif ( ! exists $h->{$device}->{healt} ) {
- print "WARNING - smartctl get $device healt info fail|status=1 time=$use_time\n";
- exit (1);
- } else {
- while ( (my ($id,$id_hash)) = (each %{$h->{$device}->{healt}} ) ) {
- if ( !exists $ssd_attribute{$id}) {
- print "not exists \$ssd_attribute{\$id},now next\n" if $debug;
- next;
- }
- print "----------------------------- loop \$h->{\$device}->{healt} hash -----------------------------\n" if $debug;
- print $h->{$device}->{healt}->{$id}->{worst} . "\t" if $debug;
- print $h->{$device}->{healt}->{$id}->{value} . "\t" if $debug;
- print $h->{$device}->{healt}->{$id}->{thresh} . "\n" if $debug;
- if ($h->{$device}->{healt}->{$id}->{value} {$device}->{healt}->{$id}->{thresh}){
- $h->{$device}->{output} .= "CRITICAL - $device " if ($h->{$device}->{output} eq "");
- $h->{$device}->{output} .= "id:$id attribute_name:" . $ssd_attribute{$id} . " value:" . $h->{$device}->{healt}->{$id}->{value} . " ";
- $h->{$device}->{status} = 2;
- print 'value
|
|
|