diff options
author | OpenShift Bot <eparis+openshiftbot@redhat.com> | 2017-06-06 16:52:47 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-06 16:52:47 -0400 |
commit | f734724fbcac6bdd2e9bb9f69058385768ef0cd0 (patch) | |
tree | 2b410928549eaa43c4323adc703a44bdaf5a962e | |
parent | 67da7e2e4699f9a226a436218723749ebc14ace1 (diff) | |
parent | 2d8e3d2b28ce19569c76c56102e9639a6f26b0c2 (diff) | |
download | openshift-f734724fbcac6bdd2e9bb9f69058385768ef0cd0.tar.gz openshift-f734724fbcac6bdd2e9bb9f69058385768ef0cd0.tar.bz2 openshift-f734724fbcac6bdd2e9bb9f69058385768ef0cd0.tar.xz openshift-f734724fbcac6bdd2e9bb9f69058385768ef0cd0.zip |
Merge pull request #4252 from sdodson/tolerate-node-upgrade-failure
Merged by openshift-bot
-rw-r--r-- | inventory/byo/hosts.origin.example | 29 | ||||
-rw-r--r-- | inventory/byo/hosts.ose.example | 29 | ||||
-rw-r--r-- | playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml | 4 | ||||
-rw-r--r-- | playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml | 2 |
4 files changed, 61 insertions, 3 deletions
diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 206ec06c3..b2490638b 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -799,6 +799,35 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal, one set of variables for normal nodes, one set of variables for +# nodes that are part of control plane as the number of hosts may be different +# in those two groups. +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +#openshift_upgrade_control_plane_nodes_serial=1 +#openshift_upgrade_control_plane_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index 4f777c330..67d53b22d 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -795,6 +795,35 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal, one set of variables for normal nodes, one set of variables for +# nodes that are part of control plane as the number of hosts may be different +# in those two groups. +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +#openshift_upgrade_control_plane_nodes_serial=1 +#openshift_upgrade_control_plane_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index e10c4c540..b980909eb 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -247,8 +247,8 @@ hosts: oo_masters_to_config:&oo_nodes_to_upgrade # This var must be set with -e on invocation, as it is not a per-host inventory var # and is evaluated early. Values such as "20%" can also be used. - serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" - any_errors_fatal: true + serial: "{{ openshift_upgrade_control_plane_nodes_serial | default(1) }}" + max_fail_percentage: "{{ openshift_upgrade_control_plane_nodes_max_fail_percentage | default(0) }}" pre_tasks: - name: Load lib_openshift modules diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 4d455fe0a..91dbc2cd4 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -4,7 +4,7 @@ # This var must be set with -e on invocation, as it is not a per-host inventory var # and is evaluated early. Values such as "20%" can also be used. serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" - any_errors_fatal: true + max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}" pre_tasks: - name: Load lib_openshift modules |