diff options
Diffstat (limited to 'roles')
21 files changed, 424 insertions, 206 deletions
diff --git a/roles/openshift_health_checker/HOWTO_CHECKS.md b/roles/openshift_health_checker/HOWTO_CHECKS.md new file mode 100644 index 000000000..1573c14da --- /dev/null +++ b/roles/openshift_health_checker/HOWTO_CHECKS.md @@ -0,0 +1,34 @@ +# OpenShift health checks + +This Ansible role contains health checks to diagnose problems in OpenShift +environments. + +Checks are typically implemented as two parts: + +1. a Python module in [openshift_checks/](openshift_checks), with a class that + inherits from `OpenShiftCheck`. +2. a custom Ansible module in [library/](library), for cases when the modules + shipped with Ansible do not provide the required functionality. + +The checks are called from an Ansible playbooks via the `openshift_health_check` +action plugin. See +[playbooks/byo/openshift-preflight/check.yml](../../playbooks/byo/openshift-preflight/check.yml) +for an example. + +The action plugin dynamically discovers all checks and executes only those +selected in the play. + +Checks can determine when they are active by implementing the method +`is_active`. Inactive checks are skipped. This is similar to the `when` +instruction in Ansible plays. + +Checks may have tags, which are a way to group related checks together. For +instance, to run all preflight checks, pass in the group `'@preflight'` to +`openshift_health_check`. + +Groups are automatically computed from tags. + +Groups and individual check names can be used together in the argument list to +`openshift_health_check`. + +Look at existing checks for the implementation details. diff --git a/roles/openshift_health_checker/README.md b/roles/openshift_health_checker/README.md new file mode 100644 index 000000000..4ab5f1f7b --- /dev/null +++ b/roles/openshift_health_checker/README.md @@ -0,0 +1,45 @@ +OpenShift Health Checker +======================== + +This role detects common problems with OpenShift installations or with +environments prior to install. + +For more information about creating new checks, see [HOWTO_CHECKS.md](HOWTO_CHECKS.md). + +Requirements +------------ + +* Ansible 2.2+ + +Role Variables +-------------- + +None + +Dependencies +------------ + +- openshift_facts + +Example Playbook +---------------- + +```yaml +--- +- hosts: OSEv3 + name: run OpenShift health checks + roles: + - openshift_health_checker + post_tasks: + - action: openshift_health_check +``` + +License +------- + +Apache License Version 2.0 + +Author Information +------------------ + +Customer Success team (dev@lists.openshift.redhat.com) diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py new file mode 100644 index 000000000..0411797b1 --- /dev/null +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -0,0 +1,116 @@ +""" +Ansible action plugin to execute health checks in OpenShift clusters. +""" +# pylint: disable=wrong-import-position,missing-docstring,invalid-name +import sys +import os + +try: + from __main__ import display +except ImportError: + from ansible.utils.display import Display + display = Display() + +from ansible.plugins.action import ActionBase + +# Augment sys.path so that we can import checks from a directory relative to +# this callback plugin. +sys.path.insert(1, os.path.dirname(os.path.dirname(__file__))) + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException # noqa: E402 + + +class ActionModule(ActionBase): + + def run(self, tmp=None, task_vars=None): + result = super(ActionModule, self).run(tmp, task_vars) + + if task_vars is None: + task_vars = {} + + if "openshift" not in task_vars: + result["failed"] = True + result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?" + return result + + try: + known_checks = self.load_known_checks() + except OpenShiftCheckException as e: + result["failed"] = True + result["msg"] = str(e) + return result + + args = self._task.args + requested_checks = resolve_checks(args.get("checks", []), known_checks.values()) + + unknown_checks = requested_checks - set(known_checks) + if unknown_checks: + result["failed"] = True + result["msg"] = ( + "One or more checks are unknown: {}. " + "Make sure there is no typo in the playbook and no files are missing." + ).format(", ".join(unknown_checks)) + return result + + result["checks"] = check_results = {} + + for check_name in requested_checks & set(known_checks): + display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"])) + check = known_checks[check_name] + + if check.is_active(task_vars): + try: + r = check.run(tmp, task_vars) + except OpenShiftCheckException as e: + r = {} + r["failed"] = True + r["msg"] = str(e) + else: + r = {"skipped": True} + + check_results[check_name] = r + + if r.get("failed", False): + result["failed"] = True + result["msg"] = "One or more checks failed" + + return result + + def load_known_checks(self): + known_checks = {} + + known_check_classes = set(cls for cls in OpenShiftCheck.subclasses()) + + for cls in known_check_classes: + check_name = cls.name + if check_name in known_checks: + other_cls = known_checks[check_name].__class__ + raise OpenShiftCheckException( + "non-unique check name '{}' in: '{}.{}' and '{}.{}'".format( + check_name, + cls.__module__, cls.__name__, + other_cls.__module__, other_cls.__name__)) + known_checks[check_name] = cls(module_executor=self._execute_module) + + return known_checks + + +def resolve_checks(names, all_checks): + """Returns a set of resolved check names. + + Resolving a check name involves expanding tag references (e.g., '@tag') with + all the checks that contain the given tag. + + names should be a sequence of strings. + + all_checks should be a sequence of check classes/instances. + """ + resolved = set() + for name in names: + if name.startswith("@"): + for check in all_checks: + if name[1:] in check.tags: + resolved.add(check.name) + else: + resolved.add(name) + return resolved diff --git a/roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 180ed8d8f..8caefab15 100644 --- a/roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -3,6 +3,8 @@ Ansible callback plugin. ''' +from pprint import pformat + from ansible.plugins.callback import CallbackBase from ansible import constants as C from ansible.utils.color import stringc @@ -79,6 +81,8 @@ def _format_failure(failure): (u'Task', task), (u'Message', stringc(msg, C.COLOR_ERROR)), ) + if 'checks' in result._result: + rows += ((u'Details', stringc(pformat(result._result['checks']), C.COLOR_ERROR)),) row_format = '{:10}{}' return [row_format.format(header + u':', body) for header, body in rows] diff --git a/roles/openshift_preflight/base/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py index f7fcb6da5..13b7d310b 100755 --- a/roles/openshift_preflight/base/library/aos_version.py +++ b/roles/openshift_health_checker/library/aos_version.py @@ -1,57 +1,49 @@ #!/usr/bin/python # vim: expandtab:tabstop=4:shiftwidth=4 ''' -An ansible module for determining if more than one minor version -of any atomic-openshift package is available, which would indicate -that multiple repos are enabled for different versions of the same -thing which may cause problems. +Ansible module for determining if multiple versions of an OpenShift package are +available, and if the version requested is available down to the given +precision. -Also, determine if the version requested is available down to the -precision requested. +Multiple versions available suggest that multiple repos are enabled for the +different versions, which may cause installation problems. ''' -# import os -# import sys import yum # pylint: disable=import-error + from ansible.module_utils.basic import AnsibleModule -def main(): # pylint: disable=missing-docstring +def main(): # pylint: disable=missing-docstring,too-many-branches module = AnsibleModule( argument_spec=dict( - version=dict(required=True) + prefix=dict(required=True), # atomic-openshift, origin, ... + version=dict(required=True), ), supports_check_mode=True ) - # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet - # for when it is actually necessary. Leaving it commented out for now, - # though this comment and the commented out code related to _unmute should - # be deleted later if not proven necessary. - - # sys.stdout = os.devnull # mute yum so it doesn't break our output - # sys.stderr = os.devnull # mute yum so it doesn't break our output - - # def _unmute(): # pylint: disable=missing-docstring - # sys.stdout = sys.__stdout__ - def bail(error): # pylint: disable=missing-docstring - # _unmute() module.fail_json(msg=error) + rpm_prefix = module.params['prefix'] + + if not rpm_prefix: + bail("prefix must not be empty") + yb = yum.YumBase() # pylint: disable=invalid-name # search for package versions available for aos pkgs expected_pkgs = [ - 'atomic-openshift', - 'atomic-openshift-master', - 'atomic-openshift-node', + rpm_prefix, + rpm_prefix + '-master', + rpm_prefix + '-node', ] try: pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs) except yum.Errors.PackageSackError as e: # pylint: disable=invalid-name # you only hit this if *none* of the packages are available - bail('Unable to find any atomic-openshift packages. \nCheck your subscription and repo settings. \n%s' % e) + bail('Unable to find any OpenShift packages.\nCheck your subscription and repo settings.\n%s' % e) # determine what level of precision we're expecting for the version expected_version = module.params['version'] @@ -92,7 +84,6 @@ def main(): # pylint: disable=missing-docstring msg += ' %s\n' % name bail(msg + "There should only be one OpenShift version's repository enabled at a time.") - # _unmute() module.exit_json(changed=False) diff --git a/roles/openshift_preflight/base/library/check_yum_update.py b/roles/openshift_health_checker/library/check_yum_update.py index 296ebd44f..9bc14fd47 100755 --- a/roles/openshift_preflight/base/library/check_yum_update.py +++ b/roles/openshift_health_checker/library/check_yum_update.py @@ -8,9 +8,10 @@ parameters: If omitted, all installed RPMs are considered for updates. ''' -# import os import sys + import yum # pylint: disable=import-error + from ansible.module_utils.basic import AnsibleModule @@ -22,18 +23,7 @@ def main(): # pylint: disable=missing-docstring,too-many-branches supports_check_mode=True ) - # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet - # for when it is actually necessary. Leaving it commented out for now, - # though this comment and the commented out code related to _unmute should - # be deleted later if not proven necessary. - - # sys.stdout = os.devnull # mute yum so it doesn't break our output - - # def _unmute(): # pylint: disable=missing-docstring - # sys.stdout = sys.__stdout__ - def bail(error): # pylint: disable=missing-docstring - # _unmute() module.fail_json(msg=error) yb = yum.YumBase() # pylint: disable=invalid-name @@ -108,7 +98,6 @@ def main(): # pylint: disable=missing-docstring,too-many-branches bail('Unknown error(s) from dependency resolution. Exit Code: %d:\n%s' % (txn_result, txn_msgs)) - # _unmute() module.exit_json(changed=False) diff --git a/roles/openshift_preflight/init/meta/main.yml b/roles/openshift_health_checker/meta/main.yml index 0bbeadd34..0bbeadd34 100644 --- a/roles/openshift_preflight/init/meta/main.yml +++ b/roles/openshift_health_checker/meta/main.yml diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py new file mode 100644 index 000000000..c31242624 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -0,0 +1,84 @@ +""" +Health checks for OpenShift clusters. +""" + +import os +from abc import ABCMeta, abstractmethod, abstractproperty +from importlib import import_module +import operator + +import six +from six.moves import reduce + + +class OpenShiftCheckException(Exception): + """Raised when a check cannot proceed.""" + pass + + +@six.add_metaclass(ABCMeta) +class OpenShiftCheck(object): + """A base class for defining checks for an OpenShift cluster environment.""" + + def __init__(self, module_executor): + self.module_executor = module_executor + + @abstractproperty + def name(self): + """The name of this check, usually derived from the class name.""" + return "openshift_check" + + @property + def tags(self): + """A list of tags that this check satisfy. + + Tags are used to reference multiple checks with a single '@tagname' + special check name. + """ + return [] + + @classmethod + def is_active(cls, task_vars): # pylint: disable=unused-argument + """Returns true if this check applies to the ansible-playbook run.""" + return True + + @abstractmethod + def run(self, tmp, task_vars): + """Executes a check, normally implemented as a module.""" + return {} + + @classmethod + def subclasses(cls): + """Returns a generator of subclasses of this class and its subclasses.""" + for subclass in cls.__subclasses__(): # pylint: disable=no-member + yield subclass + for subclass in subclass.subclasses(): + yield subclass + + +def get_var(task_vars, *keys, **kwargs): + """Helper function to get deeply nested values from task_vars. + + Ansible task_vars structures are Python dicts, often mapping strings to + other dicts. This helper makes it easier to get a nested value, raising + OpenShiftCheckException when a key is not found. + """ + try: + value = reduce(operator.getitem, keys, task_vars) + except (KeyError, TypeError): + if "default" in kwargs: + return kwargs["default"] + raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys)))) + return value + + +# Dynamically import all submodules for the side effect of loading checks. + +EXCLUDES = ( + "__init__.py", + "mixins.py", +) + +for name in os.listdir(os.path.dirname(__file__)): + if name.endswith(".py") and name not in EXCLUDES: + import_module(__package__ + "." + name[:-3]) diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py new file mode 100644 index 000000000..4029fba62 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -0,0 +1,21 @@ +# pylint: disable=missing-docstring +from openshift_checks import get_var + + +class NotContainerized(object): + """Mixin for checks that are only active when not in containerized mode.""" + + @classmethod + def is_active(cls, task_vars): + return ( + # This mixin is meant to be used with subclasses of + # OpenShiftCheck. Pylint disables this by default on mixins, + # though it relies on the class name ending in 'mixin'. + # pylint: disable=no-member + super(NotContainerized, cls).is_active(task_vars) and + not cls.is_containerized(task_vars) + ) + + @staticmethod + def is_containerized(task_vars): + return get_var(task_vars, "openshift", "common", "is_containerized") diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py new file mode 100644 index 000000000..8faeef5ee --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -0,0 +1,66 @@ +# pylint: disable=missing-docstring +from openshift_checks import OpenShiftCheck, get_var +from openshift_checks.mixins import NotContainerized + + +class PackageAvailability(NotContainerized, OpenShiftCheck): + """Check that required RPM packages are available.""" + + name = "package_availability" + tags = ["preflight"] + + def run(self, tmp, task_vars): + rpm_prefix = get_var(task_vars, "openshift", "common", "service_type") + group_names = get_var(task_vars, "group_names", default=[]) + + packages = set() + + if "masters" in group_names: + packages.update(self.master_packages(rpm_prefix)) + if "nodes" in group_names: + packages.update(self.node_packages(rpm_prefix)) + + args = {"packages": sorted(set(packages))} + return self.module_executor("check_yum_update", args, tmp, task_vars) + + @staticmethod + def master_packages(rpm_prefix): + return [ + "{rpm_prefix}".format(rpm_prefix=rpm_prefix), + "{rpm_prefix}-clients".format(rpm_prefix=rpm_prefix), + "{rpm_prefix}-master".format(rpm_prefix=rpm_prefix), + "bash-completion", + "cockpit-bridge", + "cockpit-docker", + "cockpit-kubernetes", + "cockpit-shell", + "cockpit-ws", + "etcd", + "httpd-tools", + ] + + @staticmethod + def node_packages(rpm_prefix): + return [ + "{rpm_prefix}".format(rpm_prefix=rpm_prefix), + "{rpm_prefix}-node".format(rpm_prefix=rpm_prefix), + "{rpm_prefix}-sdn-ovs".format(rpm_prefix=rpm_prefix), + "bind", + "ceph-common", + "dnsmasq", + "docker", + "firewalld", + "flannel", + "glusterfs-fuse", + "iptables-services", + "iptables", + "iscsi-initiator-utils", + "libselinux-python", + "nfs-utils", + "ntp", + "openssl", + "pyparted", + "python-httplib2", + "PyYAML", + "yum-utils", + ] diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py new file mode 100644 index 000000000..86b7b6245 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -0,0 +1,14 @@ +# pylint: disable=missing-docstring +from openshift_checks import OpenShiftCheck +from openshift_checks.mixins import NotContainerized + + +class PackageUpdate(NotContainerized, OpenShiftCheck): + """Check that there are no conflicts in RPM packages.""" + + name = "package_update" + tags = ["preflight"] + + def run(self, tmp, task_vars): + args = {"packages": []} + return self.module_executor("check_yum_update", args, tmp, task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py new file mode 100644 index 000000000..7fa09cbfd --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -0,0 +1,20 @@ +# pylint: disable=missing-docstring +from openshift_checks import OpenShiftCheck, get_var +from openshift_checks.mixins import NotContainerized + + +class PackageVersion(NotContainerized, OpenShiftCheck): + """Check that available RPM packages match the required versions.""" + + name = "package_version" + tags = ["preflight"] + + def run(self, tmp, task_vars): + rpm_prefix = get_var(task_vars, "openshift", "common", "service_type") + openshift_release = get_var(task_vars, "openshift_release") + + args = { + "prefix": rpm_prefix, + "version": openshift_release, + } + return self.module_executor("aos_version", args, tmp, task_vars) diff --git a/roles/openshift_preflight/README.md b/roles/openshift_preflight/README.md deleted file mode 100644 index b6d3542d3..000000000 --- a/roles/openshift_preflight/README.md +++ /dev/null @@ -1,52 +0,0 @@ -OpenShift Preflight Checks -========================== - -This role detects common problems prior to installing OpenShift. - -Requirements ------------- - -* Ansible 2.2+ - -Role Variables --------------- - -None - -Dependencies ------------- - -None - -Example Playbook ----------------- - -```yaml ---- -- hosts: OSEv3 - roles: - - openshift_preflight/init - -- hosts: OSEv3 - name: checks that apply to all hosts - gather_facts: no - ignore_errors: yes - roles: - - openshift_preflight/common - -- hosts: OSEv3 - name: verify check results - gather_facts: no - roles: - - openshift_preflight/verify_status -``` - -License -------- - -Apache License Version 2.0 - -Author Information ------------------- - -Customer Success team (dev@lists.openshift.redhat.com) diff --git a/roles/openshift_preflight/common/meta/main.yml b/roles/openshift_preflight/common/meta/main.yml deleted file mode 100644 index 6f23cbf3b..000000000 --- a/roles/openshift_preflight/common/meta/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -dependencies: - - role: openshift_preflight/base diff --git a/roles/openshift_preflight/common/tasks/main.yml b/roles/openshift_preflight/common/tasks/main.yml deleted file mode 100644 index f1a4a160e..000000000 --- a/roles/openshift_preflight/common/tasks/main.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- -# check content available on all hosts -- when: not openshift.common.is_containerized | bool - block: - - - name: determine if yum update will work - action: check_yum_update - register: r - - - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if yum update will work'})] }}" - - - name: determine if expected version matches what is available - aos_version: - version: "{{ openshift_release }}" - when: - - deployment_type == "openshift-enterprise" - register: r - - - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if expected version matches what is available'})] }}" diff --git a/roles/openshift_preflight/init/tasks/main.yml b/roles/openshift_preflight/init/tasks/main.yml deleted file mode 100644 index bf2d82196..000000000 --- a/roles/openshift_preflight/init/tasks/main.yml +++ /dev/null @@ -1,4 +0,0 @@ ---- -- name: set common variables - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results | default([]) }}" diff --git a/roles/openshift_preflight/masters/meta/main.yml b/roles/openshift_preflight/masters/meta/main.yml deleted file mode 100644 index 6f23cbf3b..000000000 --- a/roles/openshift_preflight/masters/meta/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -dependencies: - - role: openshift_preflight/base diff --git a/roles/openshift_preflight/masters/tasks/main.yml b/roles/openshift_preflight/masters/tasks/main.yml deleted file mode 100644 index 35fb1e3ca..000000000 --- a/roles/openshift_preflight/masters/tasks/main.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- -# determine if yum install of master pkgs will work -- when: not openshift.common.is_containerized | bool - block: - - - name: main master packages availability - check_yum_update: - packages: - - "{{ openshift.common.service_type }}" - - "{{ openshift.common.service_type }}-clients" - - "{{ openshift.common.service_type }}-master" - register: r - - - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main master packages availability'})] }}" - - - name: other master packages availability - check_yum_update: - packages: - - etcd - - bash-completion - - cockpit-bridge - - cockpit-docker - - cockpit-kubernetes - - cockpit-shell - - cockpit-ws - - httpd-tools - register: r - - - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other master packages availability'})] }}" diff --git a/roles/openshift_preflight/nodes/meta/main.yml b/roles/openshift_preflight/nodes/meta/main.yml deleted file mode 100644 index 6f23cbf3b..000000000 --- a/roles/openshift_preflight/nodes/meta/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -dependencies: - - role: openshift_preflight/base diff --git a/roles/openshift_preflight/nodes/tasks/main.yml b/roles/openshift_preflight/nodes/tasks/main.yml deleted file mode 100644 index a10e69024..000000000 --- a/roles/openshift_preflight/nodes/tasks/main.yml +++ /dev/null @@ -1,41 +0,0 @@ ---- -# determine if yum install of node pkgs will work -- when: not openshift.common.is_containerized | bool - block: - - - name: main node packages availability - check_yum_update: - packages: - - "{{ openshift.common.service_type }}" - - "{{ openshift.common.service_type }}-node" - - "{{ openshift.common.service_type }}-sdn-ovs" - register: r - - - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main node packages availability'})] }}" - - - name: other node packages availability - check_yum_update: - packages: - - docker - - PyYAML - - firewalld - - iptables - - iptables-services - - nfs-utils - - ntp - - yum-utils - - dnsmasq - - libselinux-python - - ceph-common - - glusterfs-fuse - - iscsi-initiator-utils - - pyparted - - python-httplib2 - - openssl - - flannel - - bind - register: r - - - set_fact: - oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other node packages availability'})] }}" diff --git a/roles/openshift_preflight/verify_status/tasks/main.yml b/roles/openshift_preflight/verify_status/tasks/main.yml deleted file mode 100644 index 36ccf648a..000000000 --- a/roles/openshift_preflight/verify_status/tasks/main.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -- name: find check failures - set_fact: - oo_preflight_check_failures: "{{ oo_preflight_check_results | select('failed', 'equalto', True) | list }}" - -- name: ensure all checks succeed - action: fail - when: oo_preflight_check_failures |