diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2017-09-24 11:46:04 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-24 11:46:04 -0700 |
commit | 02485be8fe554e03bf07f0650f9ff36398679722 (patch) | |
tree | f4394f8c81eef0d588ad70c723babdf6059a730f | |
parent | 5632b266c6d44f371fe7d36a0e367c462cf7a701 (diff) | |
parent | 775128cc52d8295fe4cf08211ab53730d101692c (diff) | |
download | openshift-02485be8fe554e03bf07f0650f9ff36398679722.tar.gz openshift-02485be8fe554e03bf07f0650f9ff36398679722.tar.bz2 openshift-02485be8fe554e03bf07f0650f9ff36398679722.tar.xz openshift-02485be8fe554e03bf07f0650f9ff36398679722.zip |
Merge pull request #5495 from abutcher/ca-redeploy-expired-etcd
Automatic merge from submit-queue
Improve CA redeploy restart logic
Expired etcd certificates require special casing around restarts in the certificate redeploy playbooks. When etcd certificates are expired we can't restart masters or nodes. We also can't simply restart etcd because peers also had expired certificates so we must start/stop etcd when we detect expired etcd certificates.
`openshift-ca.yml`:
* No longer restart master services when etcd certificates were previously expired.
* No longer restart node services when master or etcd certificates were previously expired.
`etcd-ca.yml`:
* No longer restart master services when etcd certificates were previously expired.
Tested using [gen_expired_tls.sh](https://gist.github.com/abutcher/bdd20b9d582675d89fb22658689c49e4) on one of my master/etcd hosts to ensure that restart logic changes caused us to skip the right restarts and do a full start/stop of etcd in the `redeploy-certificates.yml` and `redeploy-etcd-certificates.yml` playbooks.
Note: When this happens with a cluster and you want to replace all certificates, you can run: `redeploy-etcd-ca.yml`, `redeploy-openshift-ca.yml` (which will both skip restarts) and then run `redeploy-certificates.yml` which will now be able to full/stop start etcd.
6 files changed, 100 insertions, 31 deletions
diff --git a/playbooks/byo/openshift-cluster/redeploy-certificates.yml b/playbooks/byo/openshift-cluster/redeploy-certificates.yml index a3894e243..073ded6e0 100644 --- a/playbooks/byo/openshift-cluster/redeploy-certificates.yml +++ b/playbooks/byo/openshift-cluster/redeploy-certificates.yml @@ -7,6 +7,10 @@ tags: - always +- include: ../../common/openshift-cluster/redeploy-certificates/check-expiry.yml + vars: + g_check_expiry_hosts: 'oo_etcd_to_config' + - include: ../../common/openshift-cluster/redeploy-certificates/etcd.yml - include: ../../common/openshift-cluster/redeploy-certificates/masters.yml @@ -14,6 +18,8 @@ - include: ../../common/openshift-cluster/redeploy-certificates/nodes.yml - include: ../../common/openshift-etcd/restart.yml + vars: + g_etcd_certificates_expired: "{{ ('expired' in (hostvars | oo_select_keys(groups['etcd']) | oo_collect('check_results.check_results.etcd') | oo_collect('health'))) | bool }}" - include: ../../common/openshift-master/restart.yml diff --git a/playbooks/byo/openshift-cluster/redeploy-etcd-certificates.yml b/playbooks/byo/openshift-cluster/redeploy-etcd-certificates.yml index 8516baee8..0f86eb997 100644 --- a/playbooks/byo/openshift-cluster/redeploy-etcd-certificates.yml +++ b/playbooks/byo/openshift-cluster/redeploy-etcd-certificates.yml @@ -7,8 +7,14 @@ tags: - always +- include: ../../common/openshift-cluster/redeploy-certificates/check-expiry.yml + vars: + g_check_expiry_hosts: 'oo_etcd_to_config' + - include: ../../common/openshift-cluster/redeploy-certificates/etcd.yml - include: ../../common/openshift-etcd/restart.yml + vars: + g_etcd_certificates_expired: "{{ ('expired' in (hostvars | oo_select_keys(groups['etcd']) | oo_collect('check_results.check_results.etcd') | oo_collect('health'))) | bool }}" - include: ../../common/openshift-master/restart.yml diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml b/playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml new file mode 100644 index 000000000..4a9fbf7eb --- /dev/null +++ b/playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml @@ -0,0 +1,12 @@ +--- +- name: Check cert expirys + hosts: "{{ g_check_expiry_hosts }}" + vars: + openshift_certificate_expiry_show_all: yes + roles: + # Sets 'check_results' per host which contains health status for + # etcd, master and node certificates. We will use 'check_results' + # to determine if any certificates were expired prior to running + # this playbook. Service restarts will be skipped if any + # certificates were previously expired. + - role: openshift_certificate_expiry diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml b/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml index 58bbcc658..3a8e32ed1 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml @@ -153,13 +153,19 @@ changed_when: false - include: ../../openshift-master/restart.yml - # Do not restart masters when master certificates were previously expired. - when: ('expired' not in hostvars - | oo_select_keys(groups['oo_masters_to_config']) - | oo_collect('check_results.check_results.ocp_certs') - | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"})) - and - ('expired' not in hostvars - | oo_select_keys(groups['oo_masters_to_config']) - | oo_collect('check_results.check_results.ocp_certs') - | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"})) + # Do not restart masters when master or etcd certificates were previously expired. + when: + # masters + - ('expired' not in hostvars + | oo_select_keys(groups['oo_masters_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"})) + - ('expired' not in hostvars + | oo_select_keys(groups['oo_masters_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"})) + # etcd + - ('expired' not in (hostvars + | oo_select_keys(groups['etcd']) + | oo_collect('check_results.check_results.etcd') + | oo_collect('health'))) diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml b/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml index 089ae6bbc..b54acae6c 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml @@ -7,7 +7,7 @@ when: not openshift.common.version_gte_3_2_or_1_2 | bool - name: Check cert expirys - hosts: oo_nodes_to_config:oo_masters_to_config + hosts: oo_nodes_to_config:oo_masters_to_config:oo_etcd_to_config vars: openshift_certificate_expiry_show_all: yes roles: @@ -209,16 +209,22 @@ with_items: "{{ client_users }}" - include: ../../openshift-master/restart.yml - # Do not restart masters when master certificates were previously expired. - when: ('expired' not in hostvars - | oo_select_keys(groups['oo_masters_to_config']) - | oo_collect('check_results.check_results.ocp_certs') - | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"})) - and - ('expired' not in hostvars - | oo_select_keys(groups['oo_masters_to_config']) - | oo_collect('check_results.check_results.ocp_certs') - | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"})) + # Do not restart masters when master or etcd certificates were previously expired. + when: + # masters + - ('expired' not in hostvars + | oo_select_keys(groups['oo_masters_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"})) + - ('expired' not in hostvars + | oo_select_keys(groups['oo_masters_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"})) + # etcd + - ('expired' not in (hostvars + | oo_select_keys(groups['etcd']) + | oo_collect('check_results.check_results.etcd') + | oo_collect('health'))) - name: Distribute OpenShift CA certificate to nodes hosts: oo_nodes_to_config @@ -268,13 +274,28 @@ changed_when: false - include: ../../openshift-node/restart.yml - # Do not restart nodes when node certificates were previously expired. - when: ('expired' not in hostvars - | oo_select_keys(groups['oo_nodes_to_config']) - | oo_collect('check_results.check_results.ocp_certs') - | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/server.crt"})) - and - ('expired' not in hostvars - | oo_select_keys(groups['oo_nodes_to_config']) - | oo_collect('check_results.check_results.ocp_certs') - | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/ca.crt"})) + # Do not restart nodes when node, master or etcd certificates were previously expired. + when: + # nodes + - ('expired' not in hostvars + | oo_select_keys(groups['oo_nodes_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/server.crt"})) + - ('expired' not in hostvars + | oo_select_keys(groups['oo_nodes_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/ca.crt"})) + # masters + - ('expired' not in hostvars + | oo_select_keys(groups['oo_masters_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"})) + - ('expired' not in hostvars + | oo_select_keys(groups['oo_masters_to_config']) + | oo_collect('check_results.check_results.ocp_certs') + | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"})) + # etcd + - ('expired' not in (hostvars + | oo_select_keys(groups['etcd']) + | oo_collect('check_results.check_results.etcd') + | oo_collect('health'))) diff --git a/playbooks/common/openshift-etcd/restart.yml b/playbooks/common/openshift-etcd/restart.yml index af1ef245a..5eaea5ae8 100644 --- a/playbooks/common/openshift-etcd/restart.yml +++ b/playbooks/common/openshift-etcd/restart.yml @@ -7,3 +7,21 @@ service: name: "{{ 'etcd_container' if openshift.common.etcd_runtime == 'docker' else 'etcd' }}" state: restarted + when: + - not g_etcd_certificates_expired | default(false) | bool + +- name: Restart etcd + hosts: oo_etcd_to_config + tasks: + - name: stop etcd + service: + name: "{{ 'etcd_container' if openshift.common.etcd_runtime == 'docker' else 'etcd' }}" + state: stopped + when: + - g_etcd_certificates_expired | default(false) | bool + - name: start etcd + service: + name: "{{ 'etcd_container' if openshift.common.etcd_runtime == 'docker' else 'etcd' }}" + state: started + when: + - g_etcd_certificates_expired | default(false) | bool |