summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
authorMatt Woodson <mwoodson@gmail.com>2015-11-19 13:42:33 -0500
committerMatt Woodson <mwoodson@gmail.com>2015-11-19 13:42:33 -0500
commit056364ede0d4098518f639d41c01c8d340a98227 (patch)
treeb6b0c7191141ec76758e6244d8a2bb4b418db34a /roles
parentf8af5f31d7867ff0835dffd585b4e00e7cb26561 (diff)
parent60b678aa338694d8eabeeab3a5f51e79a97ddaee (diff)
downloadopenshift-056364ede0d4098518f639d41c01c8d340a98227.tar.gz
openshift-056364ede0d4098518f639d41c01c8d340a98227.tar.bz2
openshift-056364ede0d4098518f639d41c01c8d340a98227.tar.xz
openshift-056364ede0d4098518f639d41c01c8d340a98227.zip
Merge pull request #939 from mwoodson/master_checks
added metric quantile items to zabbix
Diffstat (limited to 'roles')
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml82
1 files changed, 82 insertions, 0 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 174486e15..512adad4c 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -13,6 +13,12 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.api.ping
+ description: "Verify that the Openshift API is up"
+ type: int
+ applications:
+ - Openshift Master
+
- key: openshift.master.api.healthz
description: "Checks the healthz check of the master's api: https://master_host/healthz"
type: int
@@ -44,6 +50,12 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.node.count
+ description: Shows the total number of nodes found in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
- key: openshift.project.count
description: Shows number of projects on a cluster
type: int
@@ -122,6 +134,66 @@ g_template_openshift_master:
applications:
- Openshift Etcd
+ - key: openshift.master.metric.ping
+ description: "This check verifies that the https://master/metrics check is alive and communicating properly."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
@@ -133,6 +205,16 @@ g_template_openshift_master:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: high
+ - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: high
+
+ - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: avg
+
- name: 'Openshift Master process not running on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'