#!/usr/local/bin/perl # checks a list of servers, if they do not ping, sends a message # # Created 01-Jun-1998, Brandon Gillespie # Modified 02-Jun-1998, Phil Humpherys # -- fixed spelling error,started ip-only lookup. # -- started uptime logger for clusters. # Modified 03-Jun-1998, Brandon Gillespie # -- changed it to use the exit status of ping # -- finished setup @check to be IPs, mapping to names # only for messages # -- changed it to wait once on a dead ping response # Modified 11-Jun-1998, Brandon Gillespie # -- it will email out a msg now as well # -- fixed bug where it would say a host was 'back up' # when it had never reported it being down. ## startup delay if ($ARGV[0] > 0) { print "Waiting $ARGV[0] seconds before starting...\n"; sleep($ARGV[0]); } $config = "/usr/local/etc/pingcheck.conf"; $SIG{HUP} = 'sighup'; select(STDOUT); $| = 1; my %deadt = (); my %dead = (); $rotate_checkpoint = 0; &readcfg(); checkpoint(); for (;;) { for $host (@check) { $rc = 0xffff & system("($ping -c1 $host) 2>&1 > /dev/null"); $nhost = $ncheck{$host}; if ($rc == 0) { ## it ran Ok if ($dead{$host} >= 1) { if ($dead{$host} > 1) { mailmsg("$nhost is back up"); $rotate_checkpoint = 1; } $dead{$host} = 0; $deadt{$host} = 0; } } elsif ($rc > 0x80) { ## it did not run Ok if ($dead{$host} > 1) { if ((time() - $deadt{$host}) >= $dead_interval) { if ($dead{$host} < 6) { mailmsg("$nhost is STILL not responding!"); $deadt{$host} = time(); } $dead{$host}++; } } elsif ($dead{$host} == 1) { mailmsg("$nhost is not responding!"); $dead{$host}++; } else { $deadt{$host} = time(); $dead{$host} = 1; } } elsif ($rc == 0xff00) { die("failed to exec $ping\n"); } } checkpoint(); sleep($interval); } sub checkpoint { if (!$checkpoint) { return; } if ($rotate_checkpoint) { rename("/tmp/pingcheck.run", ("/tmp/pingcheck.run." . time())); $rotate_checkpoint = 0; } my ($k, $v); open(F, ">/tmp/pingcheck.run") || print ("open(pingcheck.run): $!\n"); for $k (sort(keys %dead)) { $v = $dead{$k}; print F "dead: $k => $v\n"; } for $k (sort(keys %deadt)) { $v = $deadt{$k}; print F "deadt: $k => $v\n"; } close(F); } sub mailmsg { my $msg = $_[0]; ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(); $mon++; if ($isdst) { $tz = "MDT"; } else { $tz = "MST"; } $date = sprintf("$mon/$mday/$year $hour:%02d $tz", $min); if ($qpage_server) { system($qpage, "-s", $qpage_server, "-p", $qpage_recip, "$msg $date"); } open(MSG, "|$sendmail") || die "Unable to open sendmail: $!\n"; print MSG< 2) { $n = $n[0]; } else { $n = join(".", @n); } $ncheck{$check[$x]} = $n; } } ## this will drop anything we can't do an nslookup on @check = keys(%ncheck); for $x (@check) { $dead{$x} = 0; $deadt{$x} = 0; } ## print a msg print "[" . localtime() . "] Reading $config\n"; print "Checking servers: " . join(", ", sort values %ncheck) . "\n"; print "Using a $interval second interval.\n"; print "Using group $qpage_recip at NPP server $qpage_server\n"; print "Mailing to $mailto as $mailfrom\n"; print "Checkpoint logfile: " . ($checkpoint ? "yes" : "no") . "\n"; } sub sighup { &readcfg(); }