From af658521b46751a20a5953bd9c7f3ef01c0a74d7 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Sun, 6 Jan 2019 01:56:17 +0100 Subject: Added more checks to kaas and adei --- service/check_kaas.sh | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'service/check_kaas.sh') diff --git a/service/check_kaas.sh b/service/check_kaas.sh index d6e7300..e69e73d 100755 --- a/service/check_kaas.sh +++ b/service/check_kaas.sh @@ -4,7 +4,9 @@ cd "$(dirname "$0")" . opts.sh e_nodes=$2 - +e_pods=2 +e_restarts=10 +p_pods=10 online=$(../scripts/ping.pl "$host") healthy=$online @@ -34,4 +36,24 @@ if [ $healthy -ne 0 ]; then fi fi +# Find pods in wrong states +if [ $healthy -ne 0 ]; then + pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } { print }' | grep -P "Terminating|Pending|CrashLoopBackOff" | wc -l) + if [ $pods -ge $e_pods ]; then + healthy=2 + echo "Pods stalled in wrong states:" + oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } { print }' | grep -P "(Terminating|Pending|CrashLoopBackOff)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' + fi +fi + +# Find not-ready running pods with large restart number +if [ $healthy -ne 0 ]; then + pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | wc -l) + if [ $pods -ge $e_pods ]; then + healthy=2 + echo "Pods restarting:" + oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' + fi +fi + echo "$online $healthy $version $nodes" -- cgit v1.2.3