Merge pull request #4372 from juanvallejo/jvallejo/verify-disk-memory-before-upgrade

Add health checks to upgrade playbook
author: Rodolfo Carvalho <rhcarvalho@gmail.com> 2017-07-28 13:40:54 +0200
committer: GitHub <noreply@github.com> 2017-07-28 13:40:54 +0200
commit: d0761aee7107dffa4803f39b2aaecb660d767752 (patch)
tree: 22246d0a0c335b48c65207b6ad5a064b3eee7b77
parent: 4a442d87006bdd535eb5035d122a6b5226555da2 (diff)
parent: 8af18393771ed5679932b05e34fc1d941f7dd51e (diff)
download: openshift-d0761aee7107dffa4803f39b2aaecb660d767752.tar.gz
openshift-d0761aee7107dffa4803f39b2aaecb660d767752.tar.bz2
openshift-d0761aee7107dffa4803f39b2aaecb660d767752.tar.xz
openshift-d0761aee7107dffa4803f39b2aaecb660d767752.zip
4 files changed, 113 insertions, 5 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/init.yml b/playbooks/common/openshift-cluster/upgrades/init.yml
index 0f421928b..5fc290526 100644
--- a/playbooks/common/openshift-cluster/upgrades/init.yml
+++ b/playbooks/common/openshift-cluster/upgrades/init.yml
@@ -23,3 +23,5 @@
     set_fact:
       os_firewall_use_firewalld: false
     when: "'Active: active' in service_iptables_status.stdout"
+
+- include: ./pre/verify_health_checks.yml
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml
new file mode 100644
index 000000000..497709d25
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml
@@ -0,0 +1,13 @@
+---
+- name: Verify Host Requirements
+  hosts: oo_all_hosts
+  roles:
+  - openshift_health_checker
+  vars:
+  - r_openshift_health_checker_playbook_context: upgrade
+  post_tasks:
+  - action: openshift_health_check
+    args:
+      checks:
+      - disk_availability
+      - memory_availability
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 283461294..39ac0e4ec 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -35,6 +35,15 @@ class DiskAvailability(OpenShiftCheck):
         },
     }
 
+    # recommended disk space for each location under an upgrade context
+    recommended_disk_upgrade_bytes = {
+        '/var': {
+            'masters': 10 * 10**9,
+            'nodes': 5 * 10 ** 9,
+            'etcd': 5 * 10 ** 9,
+        },
+    }
+
     def is_active(self):
         """Skip hosts that do not have recommended disk space requirements."""
         group_names = self.get_var("group_names", default=[])
@@ -80,9 +89,34 @@ class DiskAvailability(OpenShiftCheck):
             config_bytes = max(config.get(name, 0) for name in group_names) * 10**9
             recommended_bytes = config_bytes or recommended_bytes
 
+            # if an "upgrade" context is set, update the minimum disk requirement
+            # as this signifies an in-place upgrade - the node might have the
+            # required total disk space, but some of that space may already be
+            # in use by the existing OpenShift deployment.
+            context = self.get_var("r_openshift_health_checker_playbook_context", default="")
+            if context == "upgrade":
+                recommended_upgrade_paths = self.recommended_disk_upgrade_bytes.get(path, {})
+                if recommended_upgrade_paths:
+                    recommended_bytes = config_bytes or max(recommended_upgrade_paths.get(name, 0)
+                                                            for name in group_names)
+
             if free_bytes < recommended_bytes:
                 free_gb = float(free_bytes) / 10**9
                 recommended_gb = float(recommended_bytes) / 10**9
+                msg = (
+                    'Available disk space in "{}" ({:.1f} GB) '
+                    'is below minimum recommended ({:.1f} GB)'
+                ).format(path, free_gb, recommended_gb)
+
+                # warn if check failed under an "upgrade" context
+                # due to limits imposed by the user config
+                if config_bytes and context == "upgrade":
+                    msg += ('\n\nMake sure to account for decreased disk space during an upgrade\n'
+                            'due to an existing OpenShift deployment. Please check the value of\n'
+                            '  openshift_check_min_host_disk_gb={}\n'
+                            'in your Ansible inventory, and lower the recommended disk space availability\n'
+                            'if necessary for this upgrade.').format(config_bytes)
+
                 return {
                     'failed': True,
                     'msg': (
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index e98d02c58..5720eeacf 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -97,8 +97,9 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
     assert not result.get('failed', False)
 
 
-@pytest.mark.parametrize('group_names,configured_min,ansible_mounts,extra_words', [
+@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,extra_words', [
     (
+        'test with no space available',
         ['masters'],
         0,
         [{
@@ -108,6 +109,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
         ['0.0 GB'],
     ),
     (
+        'test with a higher configured required value',
         ['masters'],
         100,  # set a higher threshold
         [{
@@ -117,6 +119,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
         ['100.0 GB'],
     ),
     (
+        'test with 1GB available, but "0" GB space requirement',
         ['nodes'],
         0,
         [{
@@ -126,6 +129,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
         ['1.0 GB'],
     ),
     (
+        'test with no space available, but "0" GB space requirement',
         ['etcd'],
         0,
         [{
@@ -135,16 +139,17 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
         ['0.0 GB'],
     ),
     (
+        'test with enough space for a node, but not for a master',
         ['nodes', 'masters'],
         0,
         [{
             'mount': '/',
-            # enough space for a node, not enough for a master
             'size_available': 15 * 10**9 + 1,
         }],
         ['15.0 GB'],
     ),
     (
+        'test failure with enough space on "/", but not enough on "/var"',
         ['etcd'],
         0,
         [{
@@ -158,8 +163,8 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
         }],
         ['0.0 GB'],
     ),
-])
-def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible_mounts, extra_words):
+], ids=lambda argval: argval[0])
+def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, extra_words):
     task_vars = dict(
         group_names=group_names,
         openshift_check_min_host_disk_gb=configured_min,
@@ -170,7 +175,61 @@ def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible
 
     assert result['failed']
     for word in 'below recommended'.split() + extra_words:
-        assert word in result['msg']
+        assert word in result.get('msg', '')
+
+
+@pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [
+    (
+        'test without enough space for master under "upgrade" context',
+        ['nodes', 'masters'],
+        "upgrade",
+        [{
+            'mount': '/',
+            'size_available': 1 * 10**9 + 1,
+            'size_total': 21 * 10**9 + 1,
+        }],
+        True,
+        ["1.0 GB"],
+    ),
+    (
+        'test with enough space for master under "upgrade" context',
+        ['nodes', 'masters'],
+        "upgrade",
+        [{
+            'mount': '/',
+            'size_available': 10 * 10**9 + 1,
+            'size_total': 21 * 10**9 + 1,
+        }],
+        False,
+        [],
+    ),
+    (
+        'test with not enough space for master, and non-upgrade context',
+        ['nodes', 'masters'],
+        "health",
+        [{
+            'mount': '/',
+            # not enough space for a master,
+            # "health" context should not lower requirement
+            'size_available': 20 * 10**9 + 1,
+        }],
+        True,
+        ["20.0 GB", "below minimum"],
+    ),
+], ids=lambda argval: argval[0])
+def test_min_required_space_changes_with_upgrade_context(name, group_names, context, ansible_mounts, failed, extra_words):
+    task_vars = dict(
+        r_openshift_health_checker_playbook_context=context,
+        group_names=group_names,
+        ansible_mounts=ansible_mounts,
+    )
+
+    check = DiskAvailability(fake_execute_module, task_vars)
+    result = check.run()
+
+    assert result.get("failed", False) == failed
+    for word in extra_words:
+        assert word in result.get('msg', '')
 
 
 def fake_execute_module(*args):
author	Rodolfo Carvalho <rhcarvalho@gmail.com>	2017-07-28 13:40:54 +0200
committer	GitHub <noreply@github.com>	2017-07-28 13:40:54 +0200
commit	d0761aee7107dffa4803f39b2aaecb660d767752 (patch)
tree	22246d0a0c335b48c65207b6ad5a064b3eee7b77
parent	4a442d87006bdd535eb5035d122a6b5226555da2 (diff)
parent	8af18393771ed5679932b05e34fc1d941f7dd51e (diff)
download	openshift-d0761aee7107dffa4803f39b2aaecb660d767752.tar.gz openshift-d0761aee7107dffa4803f39b2aaecb660d767752.tar.bz2 openshift-d0761aee7107dffa4803f39b2aaecb660d767752.tar.xz openshift-d0761aee7107dffa4803f39b2aaecb660d767752.zip