summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Wolinetz <ewolinet@redhat.com>2018-01-15 12:18:08 -0600
committerEric Wolinetz <ewolinet@redhat.com>2018-01-24 16:27:31 -0600
commit83378c05f3aaa67e36a3e7577784351fdb711c57 (patch)
tree8b388050fe3eee591772efb61e08bba5ca47cd1e
parentf34f986bf3ab0523ce6ec1145b4a57a51b9ab3fa (diff)
downloadopenshift-83378c05f3aaa67e36a3e7577784351fdb711c57.tar.gz
openshift-83378c05f3aaa67e36a3e7577784351fdb711c57.tar.bz2
openshift-83378c05f3aaa67e36a3e7577784351fdb711c57.tar.xz
openshift-83378c05f3aaa67e36a3e7577784351fdb711c57.zip
Only automatically restart if cluster is in yellow or green state
-rw-r--r--roles/installer_checkpoint/callback_plugins/installer_checkpoint.py4
-rw-r--r--roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml160
-rw-r--r--roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml4
3 files changed, 97 insertions, 71 deletions
diff --git a/roles/installer_checkpoint/callback_plugins/installer_checkpoint.py b/roles/installer_checkpoint/callback_plugins/installer_checkpoint.py
index da7e7b1da..a38b95c1d 100644
--- a/roles/installer_checkpoint/callback_plugins/installer_checkpoint.py
+++ b/roles/installer_checkpoint/callback_plugins/installer_checkpoint.py
@@ -127,6 +127,10 @@ class CallbackModule(CallbackBase):
self._display.display(
'\tThis phase can be restarted by running: {}'.format(
phase_attributes[phase]['playbook']))
+ if 'message' in stats.custom['_run'][phase]:
+ self._display.display(
+ '\t{}'.format(
+ stats.custom['_run'][phase]['message']))
self._display.display("", screen_only=True)
diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
index 6bce13d1d..879459cf6 100644
--- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
@@ -1,91 +1,113 @@
---
-# Disable external communication for {{ _cluster_component }}
-- name: Disable external communication for logging-{{ _cluster_component }}
- oc_service:
- state: present
- name: "logging-{{ _cluster_component }}"
- namespace: "{{ openshift_logging_elasticsearch_namespace }}"
- selector:
- component: "{{ _cluster_component }}"
- provider: openshift
- connection: blocked
- labels:
- logging-infra: 'support'
- ports:
- - port: 9200
- targetPort: "restapi"
- when:
- - full_restart_cluster | bool
-
## get all pods for the cluster
- command: >
oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
register: _cluster_pods
-- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
- command: >
- oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
- register: _disable_output
- changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
+### Check for cluster state before making changes -- if its red then we don't want to continue
+- name: "Checking current health for {{ _es_node }} cluster"
+ shell: >
+ oc exec "{{ _cluster_pods.stdout.split(' ')[0] }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
+ register: _pod_status
when: _cluster_pods.stdout_lines | count > 0
-# Flush ES
-- name: "Flushing for logging-{{ _cluster_component }} cluster"
- command: >
- oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
- register: _flush_output
- changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
- when:
+- when:
+ - _pod_status.stdout is defined
+ - (_pod_status.stdout | from_json)['status'] in ['red']
+ block:
+ - name: Set Logging message to manually restart
+ run_once: true
+ set_stats:
+ data:
+ installer_phase_logging:
+ message: "Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
+
+ - debug: msg="Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
+
+- when: _pod_status.stdout is undefined or (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
+ block:
+ # Disable external communication for {{ _cluster_component }}
+ - name: Disable external communication for logging-{{ _cluster_component }}
+ oc_service:
+ state: present
+ name: "logging-{{ _cluster_component }}"
+ namespace: "{{ openshift_logging_elasticsearch_namespace }}"
+ selector:
+ component: "{{ _cluster_component }}"
+ provider: openshift
+ connection: blocked
+ labels:
+ logging-infra: 'support'
+ ports:
+ - port: 9200
+ targetPort: "restapi"
+ when:
+ - full_restart_cluster | bool
+
+ - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
+ command: >
+ oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
+ register: _disable_output
+ changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
+ when: _cluster_pods.stdout_lines | count > 0
+
+ # Flush ES
+ - name: "Flushing for logging-{{ _cluster_component }} cluster"
+ command: >
+ oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
+ register: _flush_output
+ changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
+ when:
- _cluster_pods.stdout_lines | count > 0
- full_restart_cluster | bool
-- command: >
- oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
- register: _cluster_dcs
+ - command: >
+ oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+ register: _cluster_dcs
-## restart all dcs for full restart
-- name: "Restart ES node {{ _es_node }}"
- include_tasks: restart_es_node.yml
- with_items: "{{ _cluster_dcs }}"
- loop_control:
- loop_var: _es_node
- when:
+ ## restart all dcs for full restart
+ - name: "Restart ES node {{ _es_node }}"
+ include_tasks: restart_es_node.yml
+ with_items: "{{ _cluster_dcs }}"
+ loop_control:
+ loop_var: _es_node
+ when:
- full_restart_cluster | bool
-## restart the node if it's dc is in the list of nodes to restart?
-- name: "Restart ES node {{ _es_node }}"
- include_tasks: restart_es_node.yml
- with_items: "{{ _restart_logging_nodes }}"
- loop_control:
- loop_var: _es_node
- when:
+ ## restart the node if it's dc is in the list of nodes to restart?
+ - name: "Restart ES node {{ _es_node }}"
+ include_tasks: restart_es_node.yml
+ with_items: "{{ _restart_logging_nodes }}"
+ loop_control:
+ loop_var: _es_node
+ when:
- not full_restart_cluster | bool
- _es_node in _cluster_dcs.stdout
-## we may need a new first pod to run against -- fetch them all again
-- command: >
- oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
- register: _cluster_pods
+ ## we may need a new first pod to run against -- fetch them all again
+ - command: >
+ oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
+ register: _cluster_pods
-- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
- command: >
- oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
- register: _enable_output
- changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
+ - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
+ command: >
+ oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
+ register: _enable_output
+ changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
-# Reenable external communication for {{ _cluster_component }}
-- name: Reenable external communication for logging-{{ _cluster_component }}
- oc_service:
- state: present
- name: "logging-{{ _cluster_component }}"
- namespace: "{{ openshift_logging_elasticsearch_namespace }}"
- selector:
- component: "{{ _cluster_component }}"
- provider: openshift
- labels:
- logging-infra: 'support'
- ports:
+ # Reenable external communication for {{ _cluster_component }}
+ - name: Reenable external communication for logging-{{ _cluster_component }}
+ oc_service:
+ state: present
+ name: "logging-{{ _cluster_component }}"
+ namespace: "{{ openshift_logging_elasticsearch_namespace }}"
+ selector:
+ component: "{{ _cluster_component }}"
+ provider: openshift
+ labels:
+ logging-infra: 'support'
+ ports:
- port: 9200
targetPort: "restapi"
- when:
+ when:
- full_restart_cluster | bool
diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
index 6d0df40c8..fe15e40fd 100644
--- a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
@@ -26,12 +26,12 @@
- name: "Waiting for ES to be ready for {{ _es_node }}"
shell: >
- oc exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- {{ __es_local_curl }} https://localhost:9200/_cat/health | cut -d' ' -f4
+ oc exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
with_items: "{{ _pods.stdout.split(' ') }}"
loop_control:
loop_var: _pod
register: _pod_status
- until: _pod_status.stdout in ['green', 'yellow']
+ until: (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
retries: 60
delay: 5
changed_when: false