summaryrefslogtreecommitdiffstats
path: root/roles/ands_monitor/templates/scripts
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2019-10-06 05:00:55 +0200
committerSuren A. Chilingaryan <csa@suren.me>2019-10-06 05:00:55 +0200
commitba144fab071258a97cf3c42a0defeb0aae41a353 (patch)
tree2e738d4e4774d754b56d79021cc8781b3c0835a5 /roles/ands_monitor/templates/scripts
parentefe4b9bbe3c9cb950378de9697eed2030ac49ca2 (diff)
downloadands-ba144fab071258a97cf3c42a0defeb0aae41a353.tar.gz
ands-ba144fab071258a97cf3c42a0defeb0aae41a353.tar.bz2
ands-ba144fab071258a97cf3c42a0defeb0aae41a353.tar.xz
ands-ba144fab071258a97cf3c42a0defeb0aae41a353.zip
Document latest problems with docker images and resource reclaimation, add docker performance checks in the monitoring scripts, helpers to filter the logs
Diffstat (limited to 'roles/ands_monitor/templates/scripts')
-rwxr-xr-xroles/ands_monitor/templates/scripts/check_server_status.sh.j229
-rwxr-xr-xroles/ands_monitor/templates/scripts/check_uptime_status.sh.j22
2 files changed, 29 insertions, 2 deletions
diff --git a/roles/ands_monitor/templates/scripts/check_server_status.sh.j2 b/roles/ands_monitor/templates/scripts/check_server_status.sh.j2
index 0bef13c..c2849f4 100755
--- a/roles/ands_monitor/templates/scripts/check_server_status.sh.j2
+++ b/roles/ands_monitor/templates/scripts/check_server_status.sh.j2
@@ -3,7 +3,7 @@
fs=`df -lm / | grep -vi Filesystem | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 4`
datafs=`df -lm /mnt/ands | grep -vi Filesystem | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 4`
mem=`free -g | grep "Mem" | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 7`
-cpu=`uptime | sed -e "s/[[:space:]]/\n/g" | tail -n 1`
+cpu=`uptime | sed -e "s/[[:space:]]/\n/g" -e s/,/./g | tail -n 1`
if [ $fs -le 8192 ]; then
echo "Only $(($fs / 1024)) GB left in the root file system"
@@ -53,3 +53,30 @@ ping -c 1 -W 2 8.8.8.8 &> /dev/null
if [ $? -ne 0 ]; then
echo "Networkign problems, can't ping Google's public DNS server"
fi
+
+info=$(LC_ALL=C docker info)
+if [ -n "$info" ]; then
+ images=$(echo "$info" | grep -i images | grep -Po "\d+")
+ [ -n "$images" ] && images=$(docker images -a | wc -l)
+ c=$(echo "$info" | grep -i containers | grep -Po "\d+")
+ c_running=$(echo "$info" | grep -i containers -A 5 | grep -i running | grep -Po "\d+")
+ c_paused=$(echo "$info" | grep -i containers -A 5 | grep -i paused | grep -Po "\d+")
+ c_stopped=$(echo "$info" | grep -i containers -A 5 | grep -i stopped | grep -Po "\d+")
+
+ data_space=$(echo "$info" | grep -i "\bData Space Available" | grep -Po "[\d.]+\s+\w+")
+ data_size=$(echo "$data_space" | grep -Po "[\d.]+")
+ [ -n "$(echo $data_space | grep -P 'TB')" ] && data_size=$(echo "$data_size * 1024" | bc)
+ [ -z "$(echo $data_space | grep '[TG]B')" ] && data_size=0
+
+ metadata_space=$(echo "$info" | grep -i "\bMetadata Space Available" | grep -Po "[\d.]+\s+\w+")
+ metadata_size=$(echo "$metadata_space" | grep -Po "[\d.]+")
+ [ -n "$(echo $metadata_space | grep -P 'TB')" ] && metadata_size=$(echo "$metadata_size * 1024" | bc)
+ [ -z "$(echo $metadata_space | grep '[TG]B')" ] && metadata_size=0
+
+ [ $(echo "$data_size > 300" | bc) -eq 0 ] && echo "Docker Data Space is critically low ($data_space)"
+ [ $(echo "$metadata_size > 5" | bc) -eq 0 ] && echo "Docker Metadata Space is critically low ($metadata_space)"
+else
+ images=$(docker images -a | wc -l)
+ echo "docker info has timed out"
+fi
+[ "$images" -gt 1000 ] && echo "Too many docker images ($images) will cause severe scheduling penalties"
diff --git a/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2 b/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2
index 7acac5f..df65c50 100755
--- a/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2
+++ b/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2
@@ -12,4 +12,4 @@ disks=$(/opt/MegaRAID/storcli/storcli64 /c0 show | grep -P "(HDD|SSD)" | grep "O
data=`df -lh /mnt/ands | grep -vi Filesystem | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 4`
#echo -n "1 Up $up \${color gray}/ $disks disks, $data free, load: $load, pods: $pods"
-echo -en "1 $up\${color gray}, ${disks}/${data}, $(printf %3u ${containers}) c - $(printf %4.1f ${load}), $(printf %3u ${mem}) GB, $(printf %4u ${iops}) IOPS, $(printf %3u ${net}) MB/s"
+echo -en "1 $up\${color gray}, ${disks}/${data}, $(printf %3u ${containers}) c, $(printf %4.1f ${load})%, $(printf %3u ${mem}) GB, $(printf %4u ${iops}) IOPS, $(printf %3u ${net}) MB/s"