diff options
-rw-r--r-- | roles/openshift_health_checker/openshift_checks/etcd_volume.py | 68 | ||||
-rw-r--r-- | roles/openshift_health_checker/test/etcd_volume_test.py | 26 |
2 files changed, 46 insertions, 48 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index 00e240231..ad88ae44d 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -1,57 +1,55 @@ -""" -Ansible module for warning about etcd volume size past a defined threshold. -""" - from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var class EtcdVolume(OpenShiftCheck): - """Ensure disk size for an etcd host does not exceed a defined limit""" + """Ensures etcd storage usage does not exceed a given threshold.""" name = "etcd_volume" tags = ["etcd", "health"] - etcd_default_size_limit_percent = 0.9 + # pylint: disable=invalid-name + default_etcd_device_usage_threshold_percent = 90 + # where to find ectd data, higher priority first. + supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] - def run(self, tmp, task_vars): - ansible_mounts = get_var(task_vars, "ansible_mounts") + @classmethod + def is_active(cls, task_vars): + # TODO: only execute this check on hosts in the 'ectd' group? + # Maybe also 'masters' if there are no standalone etcd hosts? + return super(EtcdVolume, cls).is_active(task_vars) - etcd_mount_path = self._get_etcd_mount_path(ansible_mounts) - etcd_disk_size_available = float(etcd_mount_path["size_available"]) - etcd_disk_size_total = float(etcd_mount_path["size_total"]) - etcd_disk_size_used = etcd_disk_size_total - etcd_disk_size_available + def run(self, tmp, task_vars): + mount_info = self._etcd_mount_info(task_vars) + available = mount_info["size_available"] + total = mount_info["size_total"] + used = total - available - size_limit_percent = get_var( + threshold = get_var( task_vars, - "etcd_disk_size_limit_percent", - default=self.etcd_default_size_limit_percent + "etcd_device_usage_threshold_percent", + default=self.default_etcd_device_usage_threshold_percent ) - if etcd_disk_size_used / etcd_disk_size_total > size_limit_percent: - msg = ("Current etcd volume usage ({actual:.2f} GB) for the volume \"{volume}\" " - "is greater than the storage limit ({limit:.2f} GB).") - msg = msg.format( - actual=self._to_gigabytes(etcd_disk_size_used), - volume=etcd_mount_path["mount"], - limit=self._to_gigabytes(size_limit_percent * etcd_disk_size_total), + used_percent = 100.0 * used / total + + if used_percent > threshold: + device = mount_info.get("device", "unknown") + mount = mount_info.get("mount", "unknown") + msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format( + used_percent, threshold, device, mount ) return {"failed": True, "msg": msg} return {"changed": False} - @staticmethod - def _get_etcd_mount_path(ansible_mounts): - supported_mnt_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] - available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + def _etcd_mount_info(self, task_vars): + ansible_mounts = get_var(task_vars, "ansible_mounts") + mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts} - for path in supported_mnt_paths: - if path in available_mnts: - return available_mnts[path] + for path in self.supported_mount_paths: + if path in mounts: + return mounts[path] - paths = ', '.join(sorted(available_mnts)) or 'none' - msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths) + paths = ', '.join(sorted(mounts)) or 'none' + msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths) raise OpenShiftCheckException(msg) - - @staticmethod - def _to_gigabytes(byte_size): - return float(byte_size) / 10.0**9 diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py index ff8d0d8d7..917045526 100644 --- a/roles/openshift_health_checker/test/etcd_volume_test.py +++ b/roles/openshift_health_checker/test/etcd_volume_test.py @@ -16,7 +16,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): with pytest.raises(OpenShiftCheckException) as excinfo: check.run(tmp=None, task_vars=task_vars) - for word in 'determine available disk'.split() + extra_words: + for word in 'Unable to find etcd storage mount point'.split() + extra_words: assert word in str(excinfo.value) @@ -69,12 +69,12 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): ]) def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts): task_vars = dict( - etcd_disk_size_limit_percent=size_limit, + etcd_device_usage_threshold_percent=size_limit, ansible_mounts=ansible_mounts, ) - if task_vars["etcd_disk_size_limit_percent"] is None: - task_vars.pop("etcd_disk_size_limit_percent") + if task_vars["etcd_device_usage_threshold_percent"] is None: + task_vars.pop("etcd_device_usage_threshold_percent") check = EtcdVolume(execute_module=fake_execute_module) result = check.run(tmp=None, task_vars=task_vars) @@ -92,25 +92,25 @@ def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts): 'size_available': 1 * 10**9, 'size_total': 100 * 10**9, }], - ['90.00 GB'], + ['99.0%'], ), ( - 0.7, + 70.0, [{ 'mount': '/', 'size_available': 1 * 10**6, 'size_total': 5 * 10**9, }], - ['3.50 GB'], + ['100.0%'], ), ( - 0.4, + 40.0, [{ 'mount': '/', 'size_available': 2 * 10**9, 'size_total': 6 * 10**9, }], - ['2.40 GB'], + ['66.7%'], ), ( None, @@ -125,17 +125,17 @@ def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts): 'size_available': 1 * 10**9, 'size_total': 20 * 10**9, }], - ['18.00 GB'], + ['95.0%'], ), ]) def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts, extra_words): task_vars = dict( - etcd_disk_size_limit_percent=size_limit_percent, + etcd_device_usage_threshold_percent=size_limit_percent, ansible_mounts=ansible_mounts, ) - if task_vars["etcd_disk_size_limit_percent"] is None: - task_vars.pop("etcd_disk_size_limit_percent") + if task_vars["etcd_device_usage_threshold_percent"] is None: + task_vars.pop("etcd_device_usage_threshold_percent") check = EtcdVolume(execute_module=fake_execute_module) result = check.run(tmp=None, task_vars=task_vars) |