From 0b61ab55596415bd1b463f52a4110f46897d2352 Mon Sep 17 00:00:00 2001 From: Eric Mountain Date: Mon, 14 Dec 2015 10:35:35 +0100 Subject: Fixes typo when setting facts to record whether master/node has been restarted already, to decide whether notify handler should do so or not. Currently, this causes random SDN network setup failures as openshift-node gets restarted while the setup script is running, and the subsequent start fails to configure the SDN because it thinks it's already done. --- roles/openshift_master/tasks/main.yml | 6 +++--- roles/openshift_node/tasks/main.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'roles') diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 8995863ec..43647cc49 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -228,7 +228,7 @@ register: start_result - set_fact: - master_service_status_changed = start_result | changed + master_service_status_changed: start_result | changed when: not openshift_master_ha | bool - name: Start and enable master api @@ -237,7 +237,7 @@ register: start_result - set_fact: - master_api_service_status_changed = start_result | changed + master_api_service_status_changed: start_result | changed when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' - name: Start and enable master controller @@ -246,7 +246,7 @@ register: start_result - set_fact: - master_controllers_service_status_changed = start_result | changed + master_controllers_service_status_changed: start_result | changed when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' - name: Install cluster packages diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index eef7bec9a..f8711d15a 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -131,4 +131,4 @@ register: start_result - set_fact: - node_service_status_changed = start_result | changed + node_service_status_changed: start_result | changed -- cgit v1.2.3 From 979a61d4f5d56cc71059ee4e440856a45d14b92c Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Mon, 14 Dec 2015 15:17:23 -0500 Subject: Zabbix: added trigger to monitor app create over the last hour --- roles/os_zabbix/vars/template_openshift_master.yml | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'roles') diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 514d6fd24..a0ba8d104 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -269,6 +269,14 @@ g_template_openshift_master: - 'Openshift Master process not running on {HOST.NAME}' priority: avg + - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}' + expression: '{Template Openshift Master:create_app.sum(1h)}>3' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' + description: The application create loop has failed 4 or more times in the last hour + priority: avg + - name: 'Openshift Master API health check is failing on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -- cgit v1.2.3 From 072d4aacbe745cca7a768a60c13e65eba9d1fa48 Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Tue, 15 Dec 2015 10:59:48 -0400 Subject: Fix registry modification for new deployment types. New deployment types were missed when installing with additional docker registries. --- roles/openshift_node/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'roles') diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index eef7bec9a..b6150a00c 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -85,11 +85,11 @@ docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') | oo_split() | union(['registry.access.redhat.com']) | difference(['']) }}" - when: openshift.common.deployment_type == 'enterprise' + when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise'] - set_fact: docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') | oo_split() | difference(['']) }}" - when: openshift.common.deployment_type != 'enterprise' + when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise'] - name: Add personal registries lineinfile: -- cgit v1.2.3 From c13034c800796b7abef3f8ec0e5dd01919ae493c Mon Sep 17 00:00:00 2001 From: Joel Diaz Date: Tue, 15 Dec 2015 15:00:41 -0500 Subject: Add zabbix values to track docker container DNS results --- roles/os_zabbix/vars/template_docker.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'roles') diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index bfabf50c5..91a2c400e 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -12,6 +12,11 @@ g_template_docker: - Docker Daemon value_type: int + - key: docker.container.dns.resolution + applications: + - Docker Daemon + value_type: int + - key: docker.storage.is_loopback applications: - Docker Storage @@ -62,6 +67,11 @@ g_template_docker: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' priority: high + - name: 'docker.container.dns.resolution failed on {HOST.NAME}' + expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc' + priority: high + - name: 'Docker storage is using LOOPBACK on {HOST.NAME}' expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' -- cgit v1.2.3 From 68b0a1b5b6b0986d3a224014ef670aeba3e41716 Mon Sep 17 00:00:00 2001 From: Thomas Wiest Date: Wed, 16 Dec 2015 15:00:51 -0500 Subject: split zagg.server.processor.errors into separate heartbeat and metrics error items (needed since the scripts are split now). --- roles/os_zabbix/vars/template_zagg_server.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'roles') diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml index 0e8e53bb7..db5665993 100644 --- a/roles/os_zabbix/vars/template_zagg_server.yml +++ b/roles/os_zabbix/vars/template_zagg_server.yml @@ -7,7 +7,12 @@ g_template_zagg_server: - Zagg Server value_type: int - - key: zagg.server.processor.errors + - key: zagg.server.metrics.errors + applications: + - Zagg Server + value_type: int + + - key: zagg.server.heartbeat.errors applications: - Zagg Server value_type: int @@ -18,8 +23,13 @@ g_template_zagg_server: value_type: int ztriggers: - - name: 'Error sending metrics on {HOST.NAME}' - expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0' + - name: 'Error processing metrics on {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' + priority: average + + - name: 'Error processing heartbeats on {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' priority: average -- cgit v1.2.3 From 857f0be27dcb1a7da0f0e8596c9b08c4a424e829 Mon Sep 17 00:00:00 2001 From: Joel Diaz Date: Wed, 16 Dec 2015 15:20:31 -0500 Subject: change ovs mount to /var/run/openvswitch will not require a container restart if openvswitch service is restarted --- roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'roles') diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 index 978e40b88..bcc8a5e03 100644 --- a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 +++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 @@ -42,7 +42,7 @@ ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }} -v /etc/localtime:/etc/localtime \ -v /run/pcp:/run/pcp \ -v /var/run/docker.sock:/var/run/docker.sock \ - -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock \ + -v /var/run/openvswitch:/var/run/openvswitch \ {% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %} -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig \ -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \ -- cgit v1.2.3