blob: b43acc036757ce150ff0f3b7612185d531ca8ba8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#! /bin/bash
cd "$(dirname "$0")"
. opts.sh
e_nodes=$2
e_pods=2
e_restarts=10
p_pods=10
online=$(../scripts/ping.pl "$host")
healthy=$online
version=$(oc version | head -n 1 | awk '{ print $2 }')
if [ -z "$version" ]; then
healthy=0
else
version="OpenShift $version"
fi
etcd=$(oc get cs | grep etcd | grep "Healthy" | wc -l)
if [ -z "$etcd" -o "$etcd" -lt 3 ]; then
healthy=2
oc get cs | grep etcd | grep "Healthy" | sed -r -e 's/\s+/ /g' | awk '{ print $1, $2 }' | sed 's/^/* /'
fi
if [ $healthy -ne 0 ]; then
nodes=$(oc get nodes | grep Ready | wc -l)
if [ $nodes -ge $e_nodes ]; then
nodes=" / \${color gray}$etcd etcd, $nodes nodes"
else
offline=$(oc get nodes | grep -v "STATUS" | grep -v "Ready" | wc -l)
nodes=" / \${color gray}$etcd etcd, $nodes ready, $offline offline"
healthy=2
oc get nodes | grep -v "STATUS" | grep -v "Ready" | awk '{ print $1, $2 }' | sed 's/^/* /'
fi
fi
# Find pods in unexpected states
if [ $healthy -ne 0 ]; then
pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } { print }' | grep -P "Terminating|Pending" | wc -l)
if [ $pods -ge $e_pods ]; then
healthy=2
echo "Pods stalled in wrong states:"
oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } { print }' | grep -P "(Terminating|Pending)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g'
fi
fi
# Find not-ready running pods with large restart number
if [ $healthy -ne 0 ]; then
pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | wc -l)
if [ $pods -ge $e_pods ]; then
healthy=2
echo "Pods restarting:"
oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g'
fi
fi
# Find own pods in error states
if [ $healthy -ne 0 ]; then
pods=$(oc get pods --all-namespaces | grep -P "adei|adai|bora" | awk '$6~/s|m/ { next } { print }' | grep -P "CrashLoopBackOff|Error" | wc -l)
if [ $pods -ge $e_pods ]; then
healthy=2
echo "Pods with errors:"
oc get pods --all-namespaces -o wide | grep -P "adei|adai|bora" | awk '$6~/s|m/ { next } { print }' | grep -P "(CrashLoopBackOff|Error)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g'
fi
fi
echo "$online $healthy $version $nodes"
|