From ad47ff1df5147d35572ee3ae08f0fd74fade5e33 Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Thu, 22 Sep 2016 14:31:00 -0300 Subject: Allow a couple retries when unscheduling/rescheduling nodes in upgrade. This can fail with a transient "object has been modified" error asking you to re-try your changes on the latest version of the object. Allow up to three retries to see if we can get the change to take effect. --- .../common/openshift-cluster/upgrades/upgrade_nodes.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 917c95e29..9b572dcdf 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -32,6 +32,12 @@ {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade + # NOTE: There is a transient "object has been modified" error here, allow a couple + # retries for a more reliable upgrade. + register: node_unsched + until: node_unsched.rc == 0 + retries: 3 + delay: 1 - name: Evacuate Node for Kubelet upgrade command: > @@ -61,3 +67,9 @@ {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool + register: node_sched + until: node_sched.rc == 0 + retries: 3 + delay: 1 + + -- cgit v1.2.3