diff options
7 files changed, 138 insertions, 20 deletions
diff --git a/roles/openshift_health_checker/library/ocutil.py b/roles/openshift_health_checker/library/ocutil.py index 2e60735d6..c72f4c5b3 100644 --- a/roles/openshift_health_checker/library/ocutil.py +++ b/roles/openshift_health_checker/library/ocutil.py @@ -40,18 +40,17 @@ def main():      module = AnsibleModule(          argument_spec=dict( -            namespace=dict(type="str", required=True), +            namespace=dict(type="str", required=False),              config_file=dict(type="str", required=True),              cmd=dict(type="str", required=True),              extra_args=dict(type="list", default=[]),          ),      ) -    cmd = [ -        locate_oc_binary(), -        '--config', module.params["config_file"], -        '-n', module.params["namespace"], -    ] + shlex.split(module.params["cmd"]) +    cmd = [locate_oc_binary(), '--config', module.params["config_file"]] +    if module.params["namespace"]: +        cmd += ['-n', module.params["namespace"]] +    cmd += shlex.split(module.params["cmd"]) + module.params["extra_args"]      failed = True      try: diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 28cb53cc5..ce05b44a4 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -13,6 +13,7 @@ from importlib import import_module  from ansible.module_utils import six  from ansible.module_utils.six.moves import reduce  # pylint: disable=import-error,redefined-builtin +from ansible.module_utils.six import string_types  from ansible.plugins.filter.core import to_bool as ansible_to_bool @@ -110,6 +111,11 @@ class OpenShiftCheck(object):          """Returns true if this check applies to the ansible-playbook run."""          return True +    def is_first_master(self): +        """Determine if running on first master. Returns: bool""" +        masters = self.get_var("groups", "oo_first_master", default=None) or [None] +        return masters[0] == self.get_var("ansible_host") +      @abstractmethod      def run(self):          """Executes a check against a host and returns a result hash similar to Ansible modules. @@ -283,6 +289,17 @@ class OpenShiftCheck(object):                  ))      @staticmethod +    def normalize(name_list): +        """Return a clean list of names. + +        The input may be a comma-separated string or a sequence. Leading and +        trailing whitespace characters are removed. Empty items are discarded. +        """ +        if isinstance(name_list, string_types): +            name_list = name_list.split(',') +        return [name.strip() for name in name_list if name.strip()] + +    @staticmethod      def get_major_minor_version(openshift_image_tag):          """Parse and return the deployed version of OpenShift as a tuple."""          if openshift_image_tag and openshift_image_tag[0] == 'v': diff --git a/roles/openshift_health_checker/openshift_checks/diagnostics.py b/roles/openshift_health_checker/openshift_checks/diagnostics.py new file mode 100644 index 000000000..1cfdc1129 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/diagnostics.py @@ -0,0 +1,62 @@ +""" +A check to run relevant diagnostics via `oc adm diagnostics`. +""" + +import os + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException + + +DIAGNOSTIC_LIST = ( +    "AggregatedLogging ClusterRegistry ClusterRoleBindings ClusterRoles " +    "ClusterRouter DiagnosticPod NetworkCheck" +).split() + + +class DiagnosticCheck(OpenShiftCheck): +    """A check to run relevant diagnostics via `oc adm diagnostics`.""" + +    name = "diagnostics" +    tags = ["health"] + +    def is_active(self): +        return super(DiagnosticCheck, self).is_active() and self.is_first_master() + +    def run(self): +        if self.exec_diagnostic("ConfigContexts"): +            # only run the other diagnostics if that one succeeds (otherwise, all will fail) +            diagnostics = self.get_var("openshift_check_diagnostics", default=DIAGNOSTIC_LIST) +            for diagnostic in self.normalize(diagnostics): +                self.exec_diagnostic(diagnostic) +        return {} + +    def exec_diagnostic(self, diagnostic): +        """ +        Execute an 'oc adm diagnostics' command on the remote host. +        Raises OcNotFound or registers OcDiagFailed. +        Returns True on success or False on failure (non-zero rc). +        """ +        config_base = self.get_var("openshift.common.config_base") +        args = { +            "config_file": os.path.join(config_base, "master", "admin.kubeconfig"), +            "cmd": "adm diagnostics", +            "extra_args": [diagnostic], +        } + +        result = self.execute_module("ocutil", args, save_as_name=diagnostic + ".failure.json") +        self.register_file(diagnostic + ".txt", result['result']) +        if result.get("failed"): +            if result['result'] == '[Errno 2] No such file or directory': +                raise OpenShiftCheckException( +                    "OcNotFound", +                    "This host is supposed to be a master but does not have the `oc` command where expected.\n" +                    "Has an installation been run on this host yet?" +                ) + +            self.register_failure(OpenShiftCheckException( +                'OcDiagFailed', +                'The {diag} diagnostic reported an error:\n' +                '{error}'.format(diag=diagnostic, error=result['result']) +            )) +            return False +        return True diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index e5d93ff3f..79955cb2f 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -16,7 +16,7 @@ class EtcdVolume(OpenShiftCheck):      def is_active(self):          etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or [] -        is_etcd_host = self.get_var("ansible_ssh_host") in etcd_hosts +        is_etcd_host = self.get_var("ansible_host") in etcd_hosts          return super(EtcdVolume, self).is_active() and is_etcd_host      def run(self): diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 06bdfebf6..05ba73ca1 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -30,14 +30,6 @@ class LoggingCheck(OpenShiftCheck):          logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False)          return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master() -    def is_first_master(self): -        """Determine if running on first master. Returns: bool""" -        # Note: It would be nice to use membership in oo_first_master group, however for now it -        # seems best to avoid requiring that setup and just check this is the first master. -        hostname = self.get_var("ansible_ssh_host") or [None] -        masters = self.get_var("groups", "masters", default=None) or [None] -        return masters[0] == hostname -      def run(self):          return {} diff --git a/roles/openshift_health_checker/test/diagnostics_test.py b/roles/openshift_health_checker/test/diagnostics_test.py new file mode 100644 index 000000000..800889fa7 --- /dev/null +++ b/roles/openshift_health_checker/test/diagnostics_test.py @@ -0,0 +1,50 @@ +import pytest + +from openshift_checks.diagnostics import DiagnosticCheck, OpenShiftCheckException + + +@pytest.fixture() +def task_vars(): +    return dict( +        openshift=dict( +            common=dict(config_base="/etc/origin/") +        ) +    ) + + +def test_module_succeeds(task_vars): +    check = DiagnosticCheck(lambda *_: {"result": "success"}, task_vars) +    check.is_first_master = lambda: True +    assert check.is_active() +    check.exec_diagnostic("spam") +    assert not check.failures + + +def test_oc_not_there(task_vars): +    def exec_module(*_): +        return {"failed": True, "result": "[Errno 2] No such file or directory"} + +    check = DiagnosticCheck(exec_module, task_vars) +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.exec_diagnostic("spam") +    assert excinfo.value.name == "OcNotFound" + + +def test_module_fails(task_vars): +    def exec_module(*_): +        return {"failed": True, "result": "something broke"} + +    check = DiagnosticCheck(exec_module, task_vars) +    check.exec_diagnostic("spam") +    assert check.failures and check.failures[0].name == "OcDiagFailed" + + +def test_names_executed(task_vars): +    task_vars["openshift_check_diagnostics"] = diagnostics = "ConfigContexts,spam,,eggs" + +    def exec_module(module, args, *_): +        assert "extra_args" in args +        assert args["extra_args"][0] in diagnostics +        return {"result": "success"} + +    DiagnosticCheck(exec_module, task_vars).run() diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py index 1a1c190f6..59c703214 100644 --- a/roles/openshift_health_checker/test/logging_check_test.py +++ b/roles/openshift_health_checker/test/logging_check_test.py @@ -98,21 +98,19 @@ def test_oc_failure(problem, expect):      assert expect in str(excinfo) -groups_with_first_master = dict(masters=['this-host', 'other-host']) -groups_with_second_master = dict(masters=['other-host', 'this-host']) -groups_not_a_master = dict(masters=['other-host']) +groups_with_first_master = dict(oo_first_master=['this-host']) +groups_not_a_master = dict(oo_first_master=['other-host'], oo_masters=['other-host'])  @pytest.mark.parametrize('groups, logging_deployed, is_active', [      (groups_with_first_master, True, True),      (groups_with_first_master, False, False),      (groups_not_a_master, True, False), -    (groups_with_second_master, True, False),      (groups_not_a_master, True, False),  ])  def test_is_active(groups, logging_deployed, is_active):      task_vars = dict( -        ansible_ssh_host='this-host', +        ansible_host='this-host',          groups=groups,          openshift_hosted_logging_deploy=logging_deployed,      )  | 
