diff options
Diffstat (limited to 'roles/os_zabbix/vars')
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 82 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_os_linux.yml | 21 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_performance_copilot.yml | 14 |
3 files changed, 117 insertions, 0 deletions
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 1de4fefbb..6defc4989 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -31,6 +31,78 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.etcd.create.success + description: Show number of successful create actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.create.fail + description: Show number of failed create actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.delete.success + description: Show number of successful delete actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.delete.fail + description: Show number of failed delete actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.get.success + description: Show number of successful get actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.get.fail + description: Show number of failed get actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.set.success + description: Show number of successful set actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.set.fail + description: Show number of failed set actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.update.success + description: Show number of successful update actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.update.fail + description: Show number of failed update actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.watchers + description: Show number of etcd watchers + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.ping + description: etcd ping + type: int + applications: + - Openshift Etcd + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -56,3 +128,13 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.project.counter.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info + + - name: 'Low number of etcd watchers on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: avg + + - name: 'Etcd ping failed on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: high diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index aeeec4b8d..fbc20cd63 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -194,6 +194,11 @@ g_template_os_linux: lifetime: 1 description: "Dynamically register the filesystems" + - name: disc.disk + key: disc.disk + lifetime: 1 + description: "Dynamically register disks on a node" + zitemprototypes: - discoveryrule_key: disc.filesys name: "disc.filesys.full.{#OSO_FILESYS}" @@ -211,6 +216,22 @@ g_template_os_linux: applications: - Disk + - discoveryrule_key: disc.disk + name: "TPS (IOPS) for disk {#OSO_DISK}" + key: "disc.disk.tps[{#OSO_DISK}]" + value_type: int + description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using" + applications: + - Disk + + - discoveryrule_key: disc.disk + name: "Percent Utilized for disk {#OSO_DISK}" + key: "disc.disk.putil[{#OSO_DISK}]" + value_type: float + description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command" + applications: + - Disk + ztriggerprototypes: - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml new file mode 100644 index 000000000..b62fa0228 --- /dev/null +++ b/roles/os_zabbix/vars/template_performance_copilot.yml @@ -0,0 +1,14 @@ +--- +g_template_performance_copilot: + name: Template Performance Copilot + zitems: + - key: pcp.ping + applications: + - Performance Copilot + value_type: int + + ztriggers: + - name: 'pcp.ping failed on {HOST.NAME}' + expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc' + priority: average |