diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2017-09-22 21:02:35 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-22 21:02:35 -0700 |
commit | e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d (patch) | |
tree | a286bae582ca912e1b7badd1052f5d2ad52ec91e | |
parent | d407d4fc3cb1d76d7fef52088e2459a420952c16 (diff) | |
parent | 9698f76b641b7cc5964d2f4f318ea71702aa2245 (diff) | |
download | openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.tar.gz openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.tar.bz2 openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.tar.xz openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.zip |
Merge pull request #5491 from sosiouxme/20170920-diagnostics-check
Automatic merge from submit-queue
health checks: add diagnostics check
Adds a health check that runs `oc adm diagnostics` with each individual diagnostic.
Also, moved `is_first_master` method into superclass for reuse. And look at `oo_first_master` and `ansible_host` instead of `masters` and `ansible_ssh_host`.
7 files changed, 138 insertions, 20 deletions
diff --git a/roles/openshift_health_checker/library/ocutil.py b/roles/openshift_health_checker/library/ocutil.py index 2e60735d6..c72f4c5b3 100644 --- a/roles/openshift_health_checker/library/ocutil.py +++ b/roles/openshift_health_checker/library/ocutil.py @@ -40,18 +40,17 @@ def main(): module = AnsibleModule( argument_spec=dict( - namespace=dict(type="str", required=True), + namespace=dict(type="str", required=False), config_file=dict(type="str", required=True), cmd=dict(type="str", required=True), extra_args=dict(type="list", default=[]), ), ) - cmd = [ - locate_oc_binary(), - '--config', module.params["config_file"], - '-n', module.params["namespace"], - ] + shlex.split(module.params["cmd"]) + cmd = [locate_oc_binary(), '--config', module.params["config_file"]] + if module.params["namespace"]: + cmd += ['-n', module.params["namespace"]] + cmd += shlex.split(module.params["cmd"]) + module.params["extra_args"] failed = True try: diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 28cb53cc5..ce05b44a4 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -13,6 +13,7 @@ from importlib import import_module from ansible.module_utils import six from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin +from ansible.module_utils.six import string_types from ansible.plugins.filter.core import to_bool as ansible_to_bool @@ -110,6 +111,11 @@ class OpenShiftCheck(object): """Returns true if this check applies to the ansible-playbook run.""" return True + def is_first_master(self): + """Determine if running on first master. Returns: bool""" + masters = self.get_var("groups", "oo_first_master", default=None) or [None] + return masters[0] == self.get_var("ansible_host") + @abstractmethod def run(self): """Executes a check against a host and returns a result hash similar to Ansible modules. @@ -283,6 +289,17 @@ class OpenShiftCheck(object): )) @staticmethod + def normalize(name_list): + """Return a clean list of names. + + The input may be a comma-separated string or a sequence. Leading and + trailing whitespace characters are removed. Empty items are discarded. + """ + if isinstance(name_list, string_types): + name_list = name_list.split(',') + return [name.strip() for name in name_list if name.strip()] + + @staticmethod def get_major_minor_version(openshift_image_tag): """Parse and return the deployed version of OpenShift as a tuple.""" if openshift_image_tag and openshift_image_tag[0] == 'v': diff --git a/roles/openshift_health_checker/openshift_checks/diagnostics.py b/roles/openshift_health_checker/openshift_checks/diagnostics.py new file mode 100644 index 000000000..1cfdc1129 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/diagnostics.py @@ -0,0 +1,62 @@ +""" +A check to run relevant diagnostics via `oc adm diagnostics`. +""" + +import os + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException + + +DIAGNOSTIC_LIST = ( + "AggregatedLogging ClusterRegistry ClusterRoleBindings ClusterRoles " + "ClusterRouter DiagnosticPod NetworkCheck" +).split() + + +class DiagnosticCheck(OpenShiftCheck): + """A check to run relevant diagnostics via `oc adm diagnostics`.""" + + name = "diagnostics" + tags = ["health"] + + def is_active(self): + return super(DiagnosticCheck, self).is_active() and self.is_first_master() + + def run(self): + if self.exec_diagnostic("ConfigContexts"): + # only run the other diagnostics if that one succeeds (otherwise, all will fail) + diagnostics = self.get_var("openshift_check_diagnostics", default=DIAGNOSTIC_LIST) + for diagnostic in self.normalize(diagnostics): + self.exec_diagnostic(diagnostic) + return {} + + def exec_diagnostic(self, diagnostic): + """ + Execute an 'oc adm diagnostics' command on the remote host. + Raises OcNotFound or registers OcDiagFailed. + Returns True on success or False on failure (non-zero rc). + """ + config_base = self.get_var("openshift.common.config_base") + args = { + "config_file": os.path.join(config_base, "master", "admin.kubeconfig"), + "cmd": "adm diagnostics", + "extra_args": [diagnostic], + } + + result = self.execute_module("ocutil", args, save_as_name=diagnostic + ".failure.json") + self.register_file(diagnostic + ".txt", result['result']) + if result.get("failed"): + if result['result'] == '[Errno 2] No such file or directory': + raise OpenShiftCheckException( + "OcNotFound", + "This host is supposed to be a master but does not have the `oc` command where expected.\n" + "Has an installation been run on this host yet?" + ) + + self.register_failure(OpenShiftCheckException( + 'OcDiagFailed', + 'The {diag} diagnostic reported an error:\n' + '{error}'.format(diag=diagnostic, error=result['result']) + )) + return False + return True diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index e5d93ff3f..79955cb2f 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -16,7 +16,7 @@ class EtcdVolume(OpenShiftCheck): def is_active(self): etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or [] - is_etcd_host = self.get_var("ansible_ssh_host") in etcd_hosts + is_etcd_host = self.get_var("ansible_host") in etcd_hosts return super(EtcdVolume, self).is_active() and is_etcd_host def run(self): diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 06bdfebf6..05ba73ca1 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -30,14 +30,6 @@ class LoggingCheck(OpenShiftCheck): logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False) return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master() - def is_first_master(self): - """Determine if running on first master. Returns: bool""" - # Note: It would be nice to use membership in oo_first_master group, however for now it - # seems best to avoid requiring that setup and just check this is the first master. - hostname = self.get_var("ansible_ssh_host") or [None] - masters = self.get_var("groups", "masters", default=None) or [None] - return masters[0] == hostname - def run(self): return {} diff --git a/roles/openshift_health_checker/test/diagnostics_test.py b/roles/openshift_health_checker/test/diagnostics_test.py new file mode 100644 index 000000000..800889fa7 --- /dev/null +++ b/roles/openshift_health_checker/test/diagnostics_test.py @@ -0,0 +1,50 @@ +import pytest + +from openshift_checks.diagnostics import DiagnosticCheck, OpenShiftCheckException + + +@pytest.fixture() +def task_vars(): + return dict( + openshift=dict( + common=dict(config_base="/etc/origin/") + ) + ) + + +def test_module_succeeds(task_vars): + check = DiagnosticCheck(lambda *_: {"result": "success"}, task_vars) + check.is_first_master = lambda: True + assert check.is_active() + check.exec_diagnostic("spam") + assert not check.failures + + +def test_oc_not_there(task_vars): + def exec_module(*_): + return {"failed": True, "result": "[Errno 2] No such file or directory"} + + check = DiagnosticCheck(exec_module, task_vars) + with pytest.raises(OpenShiftCheckException) as excinfo: + check.exec_diagnostic("spam") + assert excinfo.value.name == "OcNotFound" + + +def test_module_fails(task_vars): + def exec_module(*_): + return {"failed": True, "result": "something broke"} + + check = DiagnosticCheck(exec_module, task_vars) + check.exec_diagnostic("spam") + assert check.failures and check.failures[0].name == "OcDiagFailed" + + +def test_names_executed(task_vars): + task_vars["openshift_check_diagnostics"] = diagnostics = "ConfigContexts,spam,,eggs" + + def exec_module(module, args, *_): + assert "extra_args" in args + assert args["extra_args"][0] in diagnostics + return {"result": "success"} + + DiagnosticCheck(exec_module, task_vars).run() diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py index 1a1c190f6..59c703214 100644 --- a/roles/openshift_health_checker/test/logging_check_test.py +++ b/roles/openshift_health_checker/test/logging_check_test.py @@ -98,21 +98,19 @@ def test_oc_failure(problem, expect): assert expect in str(excinfo) -groups_with_first_master = dict(masters=['this-host', 'other-host']) -groups_with_second_master = dict(masters=['other-host', 'this-host']) -groups_not_a_master = dict(masters=['other-host']) +groups_with_first_master = dict(oo_first_master=['this-host']) +groups_not_a_master = dict(oo_first_master=['other-host'], oo_masters=['other-host']) @pytest.mark.parametrize('groups, logging_deployed, is_active', [ (groups_with_first_master, True, True), (groups_with_first_master, False, False), (groups_not_a_master, True, False), - (groups_with_second_master, True, False), (groups_not_a_master, True, False), ]) def test_is_active(groups, logging_deployed, is_active): task_vars = dict( - ansible_ssh_host='this-host', + ansible_host='this-host', groups=groups, openshift_hosted_logging_deploy=logging_deployed, ) |