File: //bigscoots/dedicated/monitor.sh
#!/bin/sh
PATH=/usr/lib64/ccache:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin:/root/bin
MegaCli="/sbin/MegaCli64"
if [ ! -d /root/.bigscoots/counters ]
then
mkdir -p /root/.bigscoots/counters
fi
if [ ! -f /root/.bigscoots/counters/diskchk ]
then
usageroot=$(df -Ph / | grep -v Filesystem | awk '{ print $5}')
userootp=$(echo "$usageroot" | cut -d'%' -f1)
if [ "$userootp" -ge 95 ]
then
touch /root/.bigscoots/counters/diskchk
serverip=$($(which ifconfig) | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1' | head -1)
bash /bigscoots/general/slack.sh "#node-alerts" ":warning: $(hostname) - ${serverip} - Disk Usage: / ${usageroot}"
screen -dmS diskchk sh -c 'sleep 300 ; rm -f /root/.bigscoots/counters/diskchk'
fi
fi
if [ ! -f /root/.bigscoots/counters/memchk ]
then
availmem=$(free -g |grep Mem | awk '{print $7}')
totalmem=$(free -g |grep Mem | awk '{print $2}')
percmemfree=$(($availmem / $totalmem))
if [ "$availmem" -lt 4 ]
then
touch /root/.bigscoots/counters/memchk
bash /bigscoots/general/slack.sh "#node-alerts" ":warning: $(hostname) - ${serverip} - High Memory Usage: ${availmem}GB Free"
screen -dmS memchk sh -c 'sleep 300 ; rm -f /root/.bigscoots/counters/memchk'
fi
fi
if [ ! -f /root/.bigscoots/counters/pduchk ]
then
if ! rpm -qa | grep -q ipmitool
then
yum -y install ipmitool
fi
if [[ "$(ipmitool sdr type "Power Supply" | awk '{$2=$3=$4=$5=$6=$7=$8=$9=""; print $0}' | wc -l)" -gt 1 ]]
then
ipmitool sdr type "Power Supply" | awk '{$2=$3=$4=$5=$6=$7=$8=$9=""; print $0}' | while read -r ps status
do
if [[ "$status" != 'Presence detected' ]]
then
touch /root/.bigscoots/counters/pduchk
bash /bigscoots/general/slack.sh "#node-alerts" "Power Supply Status: \n :red_circle: $ps $status"
screen -dmS diskchk sh -c 'sleep 300 ; rm -f /root/.bigscoots/counters/pduchk'
fi
done
fi
fi
if lshw -C storage | grep -q "LSI\|Broadcom" && [ ! -f /root/.bigscoots/counters/raidcheck ]
then
if [ ! -f /sbin/MegaCli64 ]
then
mkdir -p /tmp/lsi
cd /tmp/lsi
wget -O /tmp/MegaCLI.zip "https://docs.broadcom.com/docs-and-downloads/raid-controllers/raid-controllers-common-files/8-07-14_MegaCLI.zip"
unzip ./MegaCLI.zip
rpm -ivh ./*inux/MegaCli-*.noarch.rpm
ln -s /opt/MegaRAID/MegaCli/MegaCli64 /sbin/
ln -s /opt/MegaRAID/MegaCli/MegaCli64 /usr/local/sbin/
fi
if [ ! -f /root/lsi.sh ]
then
cd /root
wget -O lsi.zip "https://www.bigscoots.com/downloads/lsi.zip"
unzip lsi.zip
chmod +x lsi.sh
fi
# Check if raid is in good condition
STATUS=$($MegaCli -LDInfo -Lall -aALL -NoLog | egrep -i 'fail|degrad|error')
# On bad raid status send email with basic drive information
if [ "$STATUS" ]
then
touch /root/.bigscoots/counters/raidcheck
MSG=$($MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g')
bash /bigscoots/general/slack.sh "#node-alerts" "Detected bad drive: $(hostname). \nOutput: \`\`\` $MSG \`\`\`"
screen -dmS raidcheck sh -c 'sleep 300 ; rm -f /root/.bigscoots/counters/raidcheck'
fi
fi
if ! rpm -qa pciutils --quiet
then
yum -yq install pciutils
fi
if [ ! -f /root/.bigscoots/counters/deaddrivechk ] && ! lspci | grep -q MegaRAID && [ ! -f /usr/StorMan/arcconf ]
then
if ! rpm -qa smartmontools --quiet
then
yum -y install smartmontools
fi
if ls -1 /dev/sd[a-z] >/dev/null 2>&1
then
disks=$(ls -1 /dev/sd[a-z])
for DISK in $disks
do
if ! smartctl_check=$(smartctl -a -d ata "$DISK")
then
echo "$smartctl_check" > "/root/.bigscoots/hardware/disk/$(basename "$DISK").$(date +%s).log"
touch /root/.bigscoots/counters/deaddrivechk
bash /bigscoots/general/slack.sh "#node-alerts" "Detected bad drive: $DISK in $(hostname). \nOutput available in: \`/root/.bigscoots/hardware/disk/$(basename "$DISK").$(date +%s).log\`"
screen -dmS diskchk sh -c 'sleep 300 ; rm -f /root/.bigscoots/counters/deaddrivechk'
fi
done
fi
fi