-
-
Save dmccombs/54a9ad2f650c9a5a64ac to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/dash | |
### | |
# ABOUT : collectd monitoring script for smartmontools (using smartctl) | |
# AUTHOR : Samuel B. <samuel_._behan_(at)_dob_._sk> (c) 2012 | |
# LICENSE: GNU GPL v3 | |
# SOURCE: http://devel.dob.sk/collectd-scripts/ | |
# | |
# This script monitors SMART pre-fail attributes of disk drives using smartmon tools. | |
# Generates output suitable for Exec plugin of collectd. | |
### | |
# Requirements: | |
# * smartmontools installed (and smartctl binary) | |
# * User & group for collector:collector | |
# groupadd collector | |
# useradd -d /var/lib/collector -g collector -l -m -s /bin/sh collector | |
# * sudo entry for binary (ie. for sys account): | |
# collector ALL=(root) NOPASSWD:/usr/sbin/smartctl * | |
# * Configuration for collectd.conf | |
# LoadPlugin exec | |
# <Plugin exec> | |
# Exec "collector:collector" "/usr/local/bin/collectd-smartmon" "sda" "sdb" | |
# </Plugin> | |
### | |
# Parameters: | |
# <disk>[:<driver>,<id> ] ... | |
### | |
# Typical usage: | |
# /etc/collect/smartmon.sh "sda:megaraid,4" "sdb" | |
# | |
# Will monitor disk 4, of megaraid adapter mapped as /dev/sda and additionaly | |
# normal disk /dev/sdb. See smartctl manual for more info about adapter driver names. | |
### | |
# Typical output: | |
# PUTVAL <host>/smartmon-sda4/gauge-raw_read_error_rate interval=300 N:30320489 | |
# PUTVAL <host>/smartmon-sda4/gauge-spin_up_time interval=300 N:0 | |
# PUTVAL <host>/smartmon-sda4/gauge-reallocated_sector_count interval=300 N:472 | |
# PUTVAL <host>/smartmon-sda4/gauge-end_to_end_error interval=300 N:0 | |
# PUTVAL <host>/smartmon-sda4/gauge-reported_uncorrect interval=300 N:1140 | |
# PUTVAL <host>/smartmon-sda4/gauge-command_timeout interval=300 N:85900918876 | |
# PUTVAL <host>/smartmon-sda4/temperature-airflow interval=300 N:31 | |
# PUTVAL <host>/smartmon-sda4/temperature-temperature interval=300 N:31 | |
# PUTVAL <host>/smartmon-sda4/gauge-offline_uncorrectable interval=300 N:5 | |
# PUTVAL <host>/smartmon-sdb/gauge-raw_read_error_rate interval=300 N:0 | |
# PUTVAL <host>/smartmon-sdb/gauge-spin_up_time interval=300 N:4352 | |
# ... | |
# | |
# Monitoring additional attributes: | |
# If it is needed to monitor additional SMART attributes provided by smartctl, you | |
# can do it simply by echoing SMART_<Attribute-Name> environment variable as its output | |
# by smartctl -A. It's nothing complicated ;) | |
# | |
# History: | |
# 2012-04-17 v0.1.0 - public release | |
# 2012-09-03 v0.1.1 - fixed dash replacement (thx to R.Buehl) | |
# 2013-08-28 v0.2.0 - Fix sudo command. | |
# Use dash as it's lower overhead. | |
# Improve docs. | |
# Add a few metrics to output. | |
# Re-order & standardise output lines for easier review & updating. | |
# 2014-12-09 v0.2.1 - Fix basename calls. | |
### | |
if [ -z "$*" ]; then | |
echo "Usage: $(basename $0) <disk> <disk>..." >&2 | |
exit 1 | |
fi | |
for disk in "$@"; do | |
disk=${disk%:*} | |
if ! [ -e "/dev/$disk" ]; then | |
echo "$(basename $0): disk /dev/$disk not found !" >&2 | |
exit 1 | |
fi | |
done | |
HOST=`hostname -f` | |
INTERVAL=300 | |
while true; do | |
for disk in "$@"; do | |
dsk=${disk%:*} | |
drv=${disk#*:} | |
id= | |
if [ "$disk" != "$drv" ]; then | |
drv="-d $drv" | |
id=${drv#*,} | |
else | |
drv= | |
fi | |
eval `sudo /usr/sbin/smartctl $drv -A "/dev/$dsk" | awk '$3 ~ /^0x/ && $2 ~ /^[a-zA-Z0-9_-]+$/ { gsub(/-/, "_"); print "SMART_" $2 "=" $10 }' 2>/dev/null` | |
[ -n "$SMART_Command_Timeout" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-command_timeout interval=$INTERVAL N:${SMART_Command_Timeout:-U}" | |
[ -n "$SMART_Current_Pending_Sector" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-current_pending_sector interval=$INTERVAL N:${SMART_Current_Pending_Sector:-U}" | |
[ -n "$SMART_End_to_End_Error" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-end_to_end_error interval=$INTERVAL N:${SMART_End_to_End_Error:-U}" | |
[ -n "$SMART_Hardware_ECC_Recovered" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-hardware_ecc_recovered interval=$INTERVAL N:${SMART_Hardware_ECC_Recovered:-U}" | |
[ -n "$SMART_Offline_Uncorrectable" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-offline_uncorrectable interval=$INTERVAL N:${SMART_Offline_Uncorrectable:-U}" | |
[ -n "$SMART_Raw_Read_Error_Rate" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-raw_read_error_rate interval=$INTERVAL N:${SMART_Raw_Read_Error_Rate:-U}" | |
[ -n "$SMART_Reallocated_Sector_Ct" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-reallocated_sector_count interval=$INTERVAL N:${SMART_Reallocated_Sector_Ct:-U}" | |
[ -n "$SMART_Reported_Uncorrect" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-reported_uncorrect interval=$INTERVAL N:${SMART_Reported_Uncorrect:-U}" | |
[ -n "$SMART_Spin_Up_Time" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-spin_up_time interval=$INTERVAL N:${SMART_Spin_Up_Time:-U}" | |
[ -n "$SMART_Airflow_Temperature_Cel" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/temperature-airflow interval=$INTERVAL N:${SMART_Airflow_Temperature_Cel:-U}" | |
[ -n "$SMART_Temperature_Celsius" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/temperature-temperature interval=$INTERVAL N:${SMART_Temperature_Celsius:-U}" | |
[ -n "$SMART_Media_Wearout_Indicator" ] && | |
echo "PUTVAL $HOST/smartmon-$dsk$id/guage-media_wearout_indicator interval=$INTERVAL N:${SMART_Media_Wearout_Indicator:-U}" | |
done | |
sleep $INTERVAL || true | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment