diff options
Diffstat (limited to 'service/check_kaas.sh')
-rwxr-xr-x | service/check_kaas.sh | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/service/check_kaas.sh b/service/check_kaas.sh index d6e7300..e69e73d 100755 --- a/service/check_kaas.sh +++ b/service/check_kaas.sh @@ -4,7 +4,9 @@ cd "$(dirname "$0")" . opts.sh e_nodes=$2 - +e_pods=2 +e_restarts=10 +p_pods=10 online=$(../scripts/ping.pl "$host") healthy=$online @@ -34,4 +36,24 @@ if [ $healthy -ne 0 ]; then fi fi +# Find pods in wrong states +if [ $healthy -ne 0 ]; then + pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } { print }' | grep -P "Terminating|Pending|CrashLoopBackOff" | wc -l) + if [ $pods -ge $e_pods ]; then + healthy=2 + echo "Pods stalled in wrong states:" + oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } { print }' | grep -P "(Terminating|Pending|CrashLoopBackOff)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' + fi +fi + +# Find not-ready running pods with large restart number +if [ $healthy -ne 0 ]; then + pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | wc -l) + if [ $pods -ge $e_pods ]; then + healthy=2 + echo "Pods restarting:" + oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' + fi +fi + echo "$online $healthy $version $nodes" |