From 5d03e56e3befb1182834d9ca3020fc53b8590f3f Mon Sep 17 00:00:00 2001 From: juanvallejo Date: Tue, 6 Jun 2017 12:41:49 -0400 Subject: add pre-flight checks to ugrade path --- .../common/openshift-cluster/upgrades/init.yml | 2 + .../upgrades/pre/verify_health_checks.yml | 13 ++++ .../openshift_checks/disk_availability.py | 35 +++++++++++ .../test/disk_availability_test.py | 69 ++++++++++++++++++++-- 4 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml diff --git a/playbooks/common/openshift-cluster/upgrades/init.yml b/playbooks/common/openshift-cluster/upgrades/init.yml index 0f421928b..5fc290526 100644 --- a/playbooks/common/openshift-cluster/upgrades/init.yml +++ b/playbooks/common/openshift-cluster/upgrades/init.yml @@ -23,3 +23,5 @@ set_fact: os_firewall_use_firewalld: false when: "'Active: active' in service_iptables_status.stdout" + +- include: ./pre/verify_health_checks.yml diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml new file mode 100644 index 000000000..497709d25 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml @@ -0,0 +1,13 @@ +--- +- name: Verify Host Requirements + hosts: oo_all_hosts + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: upgrade + post_tasks: + - action: openshift_health_check + args: + checks: + - disk_availability + - memory_availability diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 283461294..b6a874dc9 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -35,6 +35,16 @@ class DiskAvailability(OpenShiftCheck): }, } + # recommended disk space for each location under an upgrade context + recommended_disk_upgrade_bytes = { + '/var': { + 'masters': 10 * 10**9, + 'nodes': 5 * 10 ** 9, + 'etcd': 5 * 10 ** 9, + }, + } + + @classmethod def is_active(self): """Skip hosts that do not have recommended disk space requirements.""" group_names = self.get_var("group_names", default=[]) @@ -80,9 +90,34 @@ class DiskAvailability(OpenShiftCheck): config_bytes = max(config.get(name, 0) for name in group_names) * 10**9 recommended_bytes = config_bytes or recommended_bytes + # if an "upgrade" context is set, update the minimum disk requirement + # as this signifies an in-place upgrade - the node might have the + # required total disk space, but some of that space may already be + # in use by the existing OpenShift deployment. + context = self.get_var("r_openshift_health_checker_playbook_context", default="") + if context == "upgrade": + recommended_upgrade_paths = self.recommended_disk_upgrade_bytes.get(path, {}) + if recommended_upgrade_paths: + recommended_bytes = config_bytes or max(recommended_upgrade_paths.get(name, 0) + for name in group_names) + if free_bytes < recommended_bytes: free_gb = float(free_bytes) / 10**9 recommended_gb = float(recommended_bytes) / 10**9 + msg = ( + 'Available disk space in "{}" ({:.1f} GB) ' + 'is below minimum recommended ({:.1f} GB)' + ).format(path, free_gb, recommended_gb) + + # warn if check failed under an "upgrade" context + # due to limits imposed by the user config + if config_bytes and context == "upgrade": + msg += ('\n\nMake sure to account for decreased disk space during an upgrade\n' + 'due to an existing OpenShift deployment. Please check the value of\n' + ' openshift_check_min_host_disk_gb={}\n' + 'in your Ansible inventory, and lower the recommended disk space availability\n' + 'if necessary for this upgrade.').format(config_bytes) + return { 'failed': True, 'msg': ( diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index e98d02c58..bd91d32a3 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -97,8 +97,9 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib assert not result.get('failed', False) -@pytest.mark.parametrize('group_names,configured_min,ansible_mounts,extra_words', [ +@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,extra_words', [ ( + 'test with no space available', ['masters'], 0, [{ @@ -108,6 +109,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['0.0 GB'], ), ( + 'test with a higher configured required value', ['masters'], 100, # set a higher threshold [{ @@ -117,6 +119,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['100.0 GB'], ), ( + 'test with 1GB available, but "0" GB space requirement', ['nodes'], 0, [{ @@ -126,6 +129,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['1.0 GB'], ), ( + 'test with no space available, but "0" GB space requirement', ['etcd'], 0, [{ @@ -135,16 +139,17 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['0.0 GB'], ), ( + 'test with enough space for a node, but not for a master', ['nodes', 'masters'], 0, [{ 'mount': '/', - # enough space for a node, not enough for a master 'size_available': 15 * 10**9 + 1, }], ['15.0 GB'], ), ( + 'test failure with enough space on "/", but not enough on "/var"', ['etcd'], 0, [{ @@ -158,8 +163,8 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib }], ['0.0 GB'], ), -]) -def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible_mounts, extra_words): +], ids=lambda argval: argval[0]) +def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, extra_words): task_vars = dict( group_names=group_names, openshift_check_min_host_disk_gb=configured_min, @@ -170,7 +175,61 @@ def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible assert result['failed'] for word in 'below recommended'.split() + extra_words: - assert word in result['msg'] + assert word in result.get('msg', '') + + +@pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [ + ( + 'test without enough space for master under "upgrade" context', + ['nodes', 'masters'], + "upgrade", + [{ + 'mount': '/', + 'size_available': 1 * 10**9 + 1, + 'size_total': 21 * 10**9 + 1, + }], + True, + ["1.0 GB"], + ), + ( + 'test with enough space for master under "upgrade" context', + ['nodes', 'masters'], + "upgrade", + [{ + 'mount': '/', + 'size_available': 10 * 10**9 + 1, + 'size_total': 21 * 10**9 + 1, + }], + False, + [], + ), + ( + 'test with not enough space for master, and non-upgrade context', + ['nodes', 'masters'], + "health", + [{ + 'mount': '/', + # not enough space for a master, + # "health" context should not lower requirement + 'size_available': 20 * 10**9 + 1, + }], + True, + ["20.0 GB", "below minimum"], + ), +], ids=lambda argval: argval[0]) +def test_min_required_space_changes_with_upgrade_context(name, group_names, context, ansible_mounts, failed, extra_words): + task_vars = dict( + r_openshift_health_checker_playbook_context=context, + group_names=group_names, + ansible_mounts=ansible_mounts, + ) + + check = DiskAvailability(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert result.get("failed", False) == failed + for word in extra_words: + assert word in result.get('msg', '') def fake_execute_module(*args): -- cgit v1.2.3 From 8af18393771ed5679932b05e34fc1d941f7dd51e Mon Sep 17 00:00:00 2001 From: juanvallejo Date: Wed, 26 Jul 2017 19:04:08 -0400 Subject: fixes after rebasing with #4485 --- roles/openshift_health_checker/openshift_checks/disk_availability.py | 1 - roles/openshift_health_checker/test/disk_availability_test.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index b6a874dc9..39ac0e4ec 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -44,7 +44,6 @@ class DiskAvailability(OpenShiftCheck): }, } - @classmethod def is_active(self): """Skip hosts that do not have recommended disk space requirements.""" group_names = self.get_var("group_names", default=[]) diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index bd91d32a3..5720eeacf 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -224,8 +224,8 @@ def test_min_required_space_changes_with_upgrade_context(name, group_names, cont ansible_mounts=ansible_mounts, ) - check = DiskAvailability(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + check = DiskAvailability(fake_execute_module, task_vars) + result = check.run() assert result.get("failed", False) == failed for word in extra_words: -- cgit v1.2.3