summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.tito/packages/openshift-ansible2
-rw-r--r--openshift-ansible.spec23
-rw-r--r--playbooks/common/openshift-master/scaleup.yml3
-rw-r--r--playbooks/common/openshift-node/config.yml3
-rw-r--r--roles/docker/tasks/main.yml8
-rw-r--r--roles/etcd/tasks/main.yml6
-rw-r--r--roles/openshift_facts/tasks/main.yml3
-rw-r--r--roles/openshift_master/handlers/main.yml3
-rw-r--r--roles/openshift_master/tasks/main.yml3
-rw-r--r--roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j22
-rw-r--r--roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j22
-rw-r--r--roles/openshift_master/templates/docker/master.docker.service.j22
-rw-r--r--roles/openshift_node/tasks/main.yml3
-rw-r--r--roles/openshift_node/templates/openshift.docker.node.service2
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml117
-rw-r--r--roles/os_zabbix/vars/template_openshift_node.yml12
16 files changed, 129 insertions, 65 deletions
diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible
index e6a0b6e80..59e9b8458 100644
--- a/.tito/packages/openshift-ansible
+++ b/.tito/packages/openshift-ansible
@@ -1 +1 @@
-3.0.58-1 ./
+3.0.60-1 ./
diff --git a/openshift-ansible.spec b/openshift-ansible.spec
index bad7e68b2..c88b0b790 100644
--- a/openshift-ansible.spec
+++ b/openshift-ansible.spec
@@ -5,7 +5,7 @@
}
Name: openshift-ansible
-Version: 3.0.58
+Version: 3.0.60
Release: 1%{?dist}
Summary: Openshift and Atomic Enterprise Ansible
License: ASL 2.0
@@ -279,6 +279,27 @@ Atomic OpenShift Utilities includes
%changelog
+* Wed Mar 16 2016 Brenton Leanhardt <bleanhar@redhat.com> 3.0.60-1
+- Fall back to deployment_type in openshift_facts. (abutcher@redhat.com)
+- Fixing undefined variable check (kwoodson@redhat.com)
+- Fix path to cacert on /healthz/ready check (sdodson@redhat.com)
+- Load environment files in containerized installs (sdodson@redhat.com)
+- change type to value_type (zhizhang@zhizhang-laptop-nay.redhat.com)
+- change time from int to float (zhizhang@zhizhang-laptop-nay.redhat.com)
+- change the check time from 1 hour to 2 hour (zhizhang@zhizhang-laptop-
+ nay.redhat.com)
+- add item of time cost a app build and app create (zhizhang@zhizhang-laptop-
+ nay.redhat.com)
+- add trigger for app creation with build process (zhizhang@zhizhang-laptop-
+ nay.redhat.com)
+- add key of openshift.master.app.build.create (zhizhang@zhizhang-laptop-
+ nay.redhat.com)
+
+* Wed Mar 16 2016 Brenton Leanhardt <bleanhar@redhat.com> 3.0.59-1
+- Only mask etcd service for containerized installls when it's installed
+ (sdodson@redhat.com)
+- Provide cacert when performing health checks (abutcher@redhat.com)
+
* Tue Mar 15 2016 Kenny Woodson <kwoodson@redhat.com> 3.0.58-1
- Group selector feature added (kwoodson@redhat.com)
- nfs: replace yum with dnf (efreiber@redhat.com)
diff --git a/playbooks/common/openshift-master/scaleup.yml b/playbooks/common/openshift-master/scaleup.yml
index ccb1d23f1..6e6cb3e01 100644
--- a/playbooks/common/openshift-master/scaleup.yml
+++ b/playbooks/common/openshift-master/scaleup.yml
@@ -33,7 +33,8 @@
service: name={{ openshift.common.service_type }}-master-controllers state=restarted
- name: verify api server
command: >
- curl -k --silent {{ openshift.master.api_url }}/healthz/ready
+ curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt
+ {{ openshift.master.api_url }}/healthz/ready
register: api_available_output
until: api_available_output.stdout == 'ok'
retries: 120
diff --git a/playbooks/common/openshift-node/config.yml b/playbooks/common/openshift-node/config.yml
index 1116a8178..7ca941732 100644
--- a/playbooks/common/openshift-node/config.yml
+++ b/playbooks/common/openshift-node/config.yml
@@ -266,7 +266,8 @@
# Using curl here since the uri module requires python-httplib2 and
# wait_for port doesn't provide health information.
command: >
- curl -k --silent {{ openshift.master.api_url }}/healthz/ready
+ curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt
+ {{ openshift.master.api_url }}/healthz/ready
register: api_available_output
until: api_available_output.stdout == 'ok'
retries: 120
diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml
index d634996fb..9709c5014 100644
--- a/roles/docker/tasks/main.yml
+++ b/roles/docker/tasks/main.yml
@@ -49,16 +49,16 @@
dest: /etc/sysconfig/docker
regexp: '^{{ item.reg_conf_var }}=.*$'
line: "{{ item.reg_conf_var }}='{{ item.reg_fact_val | oo_prepend_strings_in_list(item.reg_flag ~ ' ') | join(' ') }}'"
- when: item.reg_fact_val is defined and docker_check.stat.isreg
+ when: item.reg_fact_val != '' and docker_check.stat.isreg
with_items:
- reg_conf_var: ADD_REGISTRY
- reg_fact_val: "{{ docker_additional_registries }}"
+ reg_fact_val: "{{ docker_additional_registries | default(None, true)}}"
reg_flag: --add-registry
- reg_conf_var: BLOCK_REGISTRY
- reg_fact_val: "{{ docker_blocked_registries }}"
+ reg_fact_val: "{{ docker_blocked_registries| default(None, true) }}"
reg_flag: --block-registry
- reg_conf_var: INSECURE_REGISTRY
- reg_fact_val: "{{ docker_insecure_registries }}"
+ reg_fact_val: "{{ docker_insecure_registries| default(None, true) }}"
reg_flag: --insecure-registry
notify:
- restart docker
diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml
index d6956de71..064544b03 100644
--- a/roles/etcd/tasks/main.yml
+++ b/roles/etcd/tasks/main.yml
@@ -36,8 +36,12 @@
state: stopped
enabled: no
+- name: Check for etcd service presence
+ command: systemctl show etcd.service
+ register: etcd_show
+
- name: Mask system etcd when containerized
- when: openshift.common.is_containerized | bool
+ when: openshift.common.is_containerized | bool and 'LoadState=not-found' not in etcd_show.stdout
command: systemctl mask etcd
- name: Reload systemd units
diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml
index 89ddb0d98..50e7e5747 100644
--- a/roles/openshift_facts/tasks/main.yml
+++ b/roles/openshift_facts/tasks/main.yml
@@ -25,7 +25,8 @@
openshift_facts:
role: common
local_facts:
- deployment_type: "{{ openshift_deployment_type }}"
+ # TODO: Deprecate deployment_type in favor of openshift_deployment_type
+ deployment_type: "{{ openshift_deployment_type | default(deployment_type) }}"
cluster_id: "{{ openshift_cluster_id | default('default') }}"
hostname: "{{ openshift_hostname | default(None) }}"
ip: "{{ openshift_ip | default(None) }}"
diff --git a/roles/openshift_master/handlers/main.yml b/roles/openshift_master/handlers/main.yml
index 4d1216aae..e5b9e4977 100644
--- a/roles/openshift_master/handlers/main.yml
+++ b/roles/openshift_master/handlers/main.yml
@@ -17,7 +17,8 @@
# Using curl here since the uri module requires python-httplib2 and
# wait_for port doesn't provide health information.
command: >
- curl -k --silent {{ openshift.master.api_url }}/healthz/ready
+ curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt
+ {{ openshift.master.api_url }}/healthz/ready
register: api_available_output
until: api_available_output.stdout == 'ok'
retries: 120
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index 1f499dc93..9c3d09d09 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -282,7 +282,8 @@
# Using curl here since the uri module requires python-httplib2 and
# wait_for port doesn't provide health information.
command: >
- curl -k --silent {{ openshift.master.api_url }}/healthz/ready
+ curl --silent --cacert {{ openshift.common.config_base }}/master/ca.crt
+ {{ openshift.master.api_url }}/healthz/ready
register: api_available_output
until: api_available_output.stdout == 'ok'
retries: 120
diff --git a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2 b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2
index f777f7657..6a21a04ab 100644
--- a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2
+++ b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-api.service.j2
@@ -12,7 +12,7 @@ Requires=docker.service
EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master-api
Environment=GOTRACEBACK=crash
ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type}}-master-api
-ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-api -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master api --config=${CONFIG_FILE} $OPTIONS
+ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-api --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master-api -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master api --config=${CONFIG_FILE} $OPTIONS
ExecStartPost=/usr/bin/sleep 10
ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master-api
LimitNOFILE=131072
diff --git a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2 b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2
index 4fc48ba15..69f68d843 100644
--- a/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2
+++ b/roles/openshift_master/templates/docker-cluster/atomic-openshift-master-controllers.service.j2
@@ -11,7 +11,7 @@ PartOf=docker.service
EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
Environment=GOTRACEBACK=crash
ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type}}-master-controllers
-ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-controllers -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master controllers --config=${CONFIG_FILE} $OPTIONS
+ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master-controllers --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master-controllers -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master controllers --config=${CONFIG_FILE} $OPTIONS
ExecStartPost=/usr/bin/sleep 10
ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master-controllers
LimitNOFILE=131072
diff --git a/roles/openshift_master/templates/docker/master.docker.service.j2 b/roles/openshift_master/templates/docker/master.docker.service.j2
index e9f4a4d21..b714fdeb7 100644
--- a/roles/openshift_master/templates/docker/master.docker.service.j2
+++ b/roles/openshift_master/templates/docker/master.docker.service.j2
@@ -8,7 +8,7 @@ Wants=etcd_container.service
[Service]
EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master
ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-master
-ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master --config=${CONFIG_FILE} $OPTIONS
+ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {{ openshift.master.master_image }}:${IMAGE_VERSION} start master --config=${CONFIG_FILE} $OPTIONS
ExecStartPost=/usr/bin/sleep 10
ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master
Restart=always
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index 81efd2cec..4b5832ab7 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -125,7 +125,8 @@
# Using curl here since the uri module requires python-httplib2 and
# wait_for port doesn't provide health information.
command: >
- curl -k --silent {{ openshift_node_master_api_url }}/healthz/ready
+ curl --silent --cacert {{ openshift.common.config_base }}/node/ca.crt
+ {{ openshift_node_master_api_url }}/healthz/ready
register: api_available_output
until: api_available_output.stdout == 'ok'
retries: 120
diff --git a/roles/openshift_node/templates/openshift.docker.node.service b/roles/openshift_node/templates/openshift.docker.node.service
index fa14cd770..53b1d6230 100644
--- a/roles/openshift_node/templates/openshift.docker.node.service
+++ b/roles/openshift_node/templates/openshift.docker.node.service
@@ -12,7 +12,7 @@ Wants={{ openshift.common.service_type }}-master.service
[Service]
EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-node
ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-node
-ExecStart=/usr/bin/docker run --name {{ openshift.common.service_type }}-node --rm --privileged --net=host --pid=host -v /:/rootfs:ro -e CONFIG_FILE=${CONFIG_FILE} -e OPTIONS=${OPTIONS} -e HOST=/rootfs -e HOST_ETC=/host-etc -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v {{ openshift.common.config_base }}/node:{{ openshift.common.config_base }}/node -v /etc/localtime:/etc/localtime:ro -v /etc/machine-id:/etc/machine-id:ro -v /run:/run -v /sys:/sys:ro -v /usr/bin/docker:/usr/bin/docker:ro -v /var/lib/docker:/var/lib/docker -v /lib/modules:/lib/modules -v /etc/origin/openvswitch:/etc/openvswitch -v /etc/origin/sdn:/etc/openshift-sdn -v /etc/systemd/system:/host-etc/systemd/system -v /var/log:/var/log {{ openshift.node.node_image }}:${IMAGE_VERSION}
+ExecStart=/usr/bin/docker run --name {{ openshift.common.service_type }}-node --rm --privileged --net=host --pid=host --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-node -v /:/rootfs:ro -e CONFIG_FILE=${CONFIG_FILE} -e OPTIONS=${OPTIONS} -e HOST=/rootfs -e HOST_ETC=/host-etc -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v {{ openshift.common.config_base }}/node:{{ openshift.common.config_base }}/node -v /etc/localtime:/etc/localtime:ro -v /etc/machine-id:/etc/machine-id:ro -v /run:/run -v /sys:/sys:ro -v /usr/bin/docker:/usr/bin/docker:ro -v /var/lib/docker:/var/lib/docker -v /lib/modules:/lib/modules -v /etc/origin/openvswitch:/etc/openvswitch -v /etc/origin/sdn:/etc/openshift-sdn -v /etc/systemd/system:/host-etc/systemd/system -v /var/log:/var/log {{ openshift.node.node_image }}:${IMAGE_VERSION}
ExecStartPost=/usr/bin/sleep 10
ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-node
SyslogIdentifier={{ openshift.common.service_type }}-node
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index e36f23a2b..705066b35 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -6,258 +6,276 @@ g_template_openshift_master:
applications:
- Openshift Master
key: openshift.master.app.create
+
+ - key: openshift.master.app.build.create
+ description: "check the app create with a build process"
+ value_type: int
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.app.create.time
+ description: "check the time it takes app create with a build process"
+ value_type: float
+ applications:
+ - Openshift Master
+
+ - key: openshift.master.app.build.time
+ description: "check the time it takes app build"
+ value_type: float
+ applications:
+ - Openshift Master
- key: openshift.master.process.count
description: Shows number of master processes running
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.api.ping
description: "Verify that the Openshift API is up (uses the cluster API URL)"
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.local.api.ping
description: "Verify that the Openshift API is up on the host (uses the API URL as the https://127.0.0.1)"
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.api.healthz
description: "Checks the healthz check of the master's api: https://<cluster_api_url>/healthz"
- type: int
+ value_type: int
data_type: bool
applications:
- Openshift Master
- key: openshift.master.local.api.healthz
description: "Checks the healthz check of the master's api: https://127.0.0.1/healthz"
- type: int
+ value_type: int
data_type: bool
applications:
- Openshift Master
- key: openshift.master.user.count
description: Shows number of users in a cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pod.running.count
description: Shows number of pods running
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pod.user.running.count
description: Shows number of user pods running (non infrastructure pods)
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pod.total.count
description: Shows total number of pods (running and non running)
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.node.count
description: Shows the total number of nodes found in the Openshift Cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.project.count
description: Shows number of projects on a cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pv.total.count
description: Total number of Persistent Volumes in the Openshift Cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pv.available.count
description: Total number of Available Persistent Volumes in the Openshift Cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pv.released.count
description: Total number of Released Persistent Volumes in the Openshift Cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pv.bound.count
description: Total number of Bound Persistent Volumes in the Openshift Cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.pv.failed.count
description: Total number of Failed Persistent Volumes in the Openshift Cluster
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.skydns.port.open
description: State of the SkyDNS port open and listening
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.skydns.query
description: SkyDNS can be queried or not
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.etcd.create.success
description: Show number of successful create actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.create.fail
description: Show number of failed create actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.delete.success
description: Show number of successful delete actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.delete.fail
description: Show number of failed delete actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.get.success
description: Show number of successful get actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.get.fail
description: Show number of failed get actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.set.success
description: Show number of successful set actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.set.fail
description: Show number of failed set actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.update.success
description: Show number of successful update actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.update.fail
description: Show number of failed update actions
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.watchers
description: Show number of etcd watchers
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.etcd.ping
description: etcd ping
- type: int
+ value_type: int
applications:
- Openshift Etcd
- key: openshift.master.metric.ping
description: "This check verifies that the https://master/metrics check is alive and communicating properly."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.nodesnotready.count
description: "This check shows how many nodes in a cluster are in NotReady state."
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.nodesnotschedulable.count
description: "This check shows how many nodes in a cluster are not schedulable."
- type: int
+ value_type: int
applications:
- Openshift Master
- key: openshift.master.apiserver.latency.summary.pods.quantile.list.5
description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.apiserver.latency.summary.pods.quantile.list.9
description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.apiserver.latency.summary.pods.quantile.list.99
description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5
description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9
description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99
description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5
description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9
description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
- key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99
description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed."
- type: int
+ value_type: int
applications:
- Openshift Master Metrics
@@ -295,6 +313,13 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: avg
+ - name: 'Application creation with build has failed on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.app.build.create.last(#1)}=1 and {Template Openshift Master:openshift.master.app.build.create.last(#2)}=1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ priority: avg
+
- name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.app.create.sum(1h)}>3'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
@@ -303,6 +328,14 @@ g_template_openshift_master:
description: The application create loop has failed 4 or more times in the last hour
priority: avg
+ - name: 'Application with build creation has failed multiple times in the last 2 hour on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.app.build.create.sum(2h)}>3'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ description: The application create loop has failed 4 or more times in the last hour
+ priority: avg
+
- name: 'Openshift Master API health check is failing on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml
index 66bd3a147..9f84a2cdf 100644
--- a/roles/os_zabbix/vars/template_openshift_node.yml
+++ b/roles/os_zabbix/vars/template_openshift_node.yml
@@ -4,37 +4,37 @@ g_template_openshift_node:
zitems:
- key: openshift.node.process.count
description: Shows number of OpenShift Node processes running
- type: int
+ value_type: int
applications:
- Openshift Node
- key: openshift.node.ovs.pids.count
description: Shows number of ovs process ids running
- type: int
+ value_type: int
applications:
- Openshift Node
- key: openshift.node.ovs.ports.count
description: Shows number of OVS ports defined
- type: int
+ value_type: int
applications:
- Openshift Node
- key: openshift.node.ovs.stray.rules
description: Number of OVS stray rules found/removed
- type: int
+ value_type: int
applications:
- Openshift Node
- key: openshift.node.registry-pods.healthy_pct
description: Shows the percentage of healthy registries in the cluster
- type: int
+ value_type: int
applications:
- Openshift Node
- key: openshift.node.registry.service.ping
description: Ping docker-registry service from node
- type: int
+ value_type: int
applications:
- Openshift Node