diff options
15 files changed, 215 insertions, 61 deletions
diff --git a/.redhat-ci.inventory b/.redhat-ci.inventory index 3c8296055..23bc9923c 100644 --- a/.redhat-ci.inventory +++ b/.redhat-ci.inventory @@ -9,6 +9,7 @@ ansible_python_interpreter=/usr/bin/python3 deployment_type=origin openshift_image_tag="{{ lookup('env', 'OPENSHIFT_IMAGE_TAG') }}" openshift_master_default_subdomain="{{ lookup('env', 'RHCI_ocp_node1_IP') }}.xip.io" +openshift_check_min_host_memory_gb=1.9 [masters] ocp-master diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible index 8e2a52845..c06826331 100644 --- a/.tito/packages/openshift-ansible +++ b/.tito/packages/openshift-ansible @@ -1 +1 @@ -3.6.84-1 ./ +3.6.85-1 ./ diff --git a/openshift-ansible.spec b/openshift-ansible.spec index 06245f55c..b27f4c1c4 100644 --- a/openshift-ansible.spec +++ b/openshift-ansible.spec @@ -9,7 +9,7 @@ %global __requires_exclude ^/usr/bin/ansible-playbook$ Name: openshift-ansible -Version: 3.6.84 +Version: 3.6.85 Release: 1%{?dist} Summary: Openshift and Atomic Enterprise Ansible License: ASL 2.0 @@ -280,6 +280,18 @@ Atomic OpenShift Utilities includes %changelog +* Thu May 25 2017 Jenkins CD Merge Bot <tdawson@redhat.com> 3.6.85-1 +- Prepending v to registry-console version (ewolinet@redhat.com) +- memory health check: adjust threshold for etcd (lmeyer@redhat.com) +- health checks: specify check skip reason (lmeyer@redhat.com) +- health checks: configure failure output in playbooks (lmeyer@redhat.com) +- disk/memory checks: make threshold configurable (lmeyer@redhat.com) +- Show help on how to disable checks after failure (rhcarvalho@gmail.com) +- Allow disabling checks via Ansible variable (rhcarvalho@gmail.com) +- Verify memory and disk requirements before install (rhcarvalho@gmail.com) +- filter_plugins: Allow for multiple pairs in map_from_pairs() + (jarrpa@redhat.com) + * Wed May 24 2017 Jenkins CD Merge Bot <tdawson@redhat.com> 3.6.84-1 - oc_process: Better error output on failed template() call (jarrpa@redhat.com) diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index acf5469bf..fd4a9eb26 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -3,6 +3,19 @@ tags: - always +- name: Verify Requirements + hosts: OSEv3 + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "install" + post_tasks: + - action: openshift_health_check + args: + checks: + - disk_availability + - memory_availability + - include: ../../common/openshift-cluster/std_include.yml tags: - always diff --git a/playbooks/common/openshift-checks/health.yml b/playbooks/common/openshift-checks/health.yml index fc0f523d5..1bee460e8 100644 --- a/playbooks/common/openshift-checks/health.yml +++ b/playbooks/common/openshift-checks/health.yml @@ -2,9 +2,10 @@ - name: Run OpenShift health checks hosts: OSEv3 roles: - - openshift_health_checker + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "health" post_tasks: - - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 - args: - checks: - - '@health' + - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 + args: + checks: ['@health'] diff --git a/playbooks/common/openshift-checks/pre-install.yml b/playbooks/common/openshift-checks/pre-install.yml index c8ffc3d91..e01c6f38d 100644 --- a/playbooks/common/openshift-checks/pre-install.yml +++ b/playbooks/common/openshift-checks/pre-install.yml @@ -2,9 +2,10 @@ - hosts: OSEv3 name: run OpenShift pre-install checks roles: - - openshift_health_checker + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "pre-install" post_tasks: - - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 - args: - checks: - - '@preflight' + - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 + args: + checks: ['@preflight'] diff --git a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml index 9b76f1dd0..4cf434dab 100644 --- a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml @@ -10,7 +10,7 @@ router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', openshift_image_tag ) }}" registry_console_image: "{{ openshift.master.registry_url | replace ( '${component}', 'registry-console') | - replace ( '${version}', openshift.common.short_version ) }}" + replace ( '${version}', 'v' ~ openshift.common.short_version ) }}" pre_tasks: - name: Load lib_openshift modules diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index 03c40b78b..a62e4331e 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -25,9 +25,11 @@ class ActionModule(ActionBase): def run(self, tmp=None, task_vars=None): result = super(ActionModule, self).run(tmp, task_vars) + task_vars = task_vars or {} - if task_vars is None: - task_vars = {} + # vars are not supportably available in the callback plugin, + # so record any it will need in the result. + result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context') if "openshift" not in task_vars: result["failed"] = True @@ -46,19 +48,27 @@ class ActionModule(ActionBase): result["checks"] = check_results = {} + user_disabled_checks = [ + check.strip() + for check in task_vars.get("openshift_disable_check", "").split(",") + ] + for check_name in resolved_checks: display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"])) check = known_checks[check_name] - if check.is_active(task_vars): + if not check.is_active(task_vars): + r = dict(skipped=True, skipped_reason="Not active for this host") + elif check_name in user_disabled_checks: + r = dict(skipped=True, skipped_reason="Disabled by user request") + else: try: r = check.run(tmp, task_vars) except OpenShiftCheckException as e: - r = {} - r["failed"] = True - r["msg"] = str(e) - else: - r = {"skipped": True} + r = dict( + failed=True, + msg=str(e), + ) check_results[check_name] = r diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 7bce7f107..64c29a8d9 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -2,6 +2,12 @@ Ansible callback plugin. ''' +# Reason: In several locations below we disable pylint protected-access +# for Ansible objects that do not give us any public way +# to access the full details we need to report check failures. +# Status: disabled permanently or until Ansible object has a public API. +# This does leave the code more likely to be broken by future Ansible changes. + from pprint import pformat from ansible.plugins.callback import CallbackBase @@ -20,38 +26,37 @@ class CallbackModule(CallbackBase): CALLBACK_TYPE = 'aggregate' CALLBACK_NAME = 'failure_summary' CALLBACK_NEEDS_WHITELIST = False + _playbook_file = None def __init__(self): super(CallbackModule, self).__init__() self.__failures = [] + def v2_playbook_on_start(self, playbook): + super(CallbackModule, self).v2_playbook_on_start(playbook) + # re: playbook attrs see top comment # pylint: disable=protected-access + self._playbook_file = playbook._file_name + def v2_runner_on_failed(self, result, ignore_errors=False): super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors) self.__failures.append(dict(result=result, ignore_errors=ignore_errors)) def v2_playbook_on_stats(self, stats): super(CallbackModule, self).v2_playbook_on_stats(stats) - # TODO: update condition to consider a host var or env var to - # enable/disable the summary, so that we can control the output from a - # play. if self.__failures: - self._print_failure_summary() + self._print_failure_details(self.__failures) - def _print_failure_summary(self): - '''Print a summary of failed tasks (including ignored failures).''' + def _print_failure_details(self, failures): + '''Print a summary of failed tasks or checks.''' self._display.display(u'\nFailure summary:\n') - # TODO: group failures by host or by task. If grouped by host, it is - # easy to see all problems of a given host. If grouped by task, it is - # easy to see what hosts needs the same fix. - - width = len(str(len(self.__failures))) + width = len(str(len(failures))) initial_indent_format = u' {{:>{width}}}. '.format(width=width) initial_indent_len = len(initial_indent_format.format(0)) subsequent_indent = u' ' * initial_indent_len subsequent_extra_indent = u' ' * (initial_indent_len + 10) - for i, failure in enumerate(self.__failures, 1): + for i, failure in enumerate(failures, 1): entries = _format_failure(failure) self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), entries[0])) for entry in entries[1:]: @@ -59,11 +64,52 @@ class CallbackModule(CallbackBase): indented = u'{}{}'.format(subsequent_indent, entry) self._display.display(indented) - -# Reason: disable pylint protected-access because we need to access _* -# attributes of a task result to implement this method. -# Status: permanently disabled unless Ansible's API changes. -# pylint: disable=protected-access + failed_checks = set() + playbook_context = None + # re: result attrs see top comment # pylint: disable=protected-access + for failure in failures: + # get context from check task result since callback plugins cannot access task vars + playbook_context = playbook_context or failure['result']._result.get('playbook_context') + failed_checks.update( + name + for name, result in failure['result']._result.get('checks', {}).items() + if result.get('failed') + ) + if failed_checks: + self._print_check_failure_summary(failed_checks, playbook_context) + + def _print_check_failure_summary(self, failed_checks, context): + checks = ','.join(sorted(failed_checks)) + # NOTE: context is not set if all failures occurred prior to checks task + summary = ( + '\n' + 'The execution of "{playbook}"\n' + 'includes checks designed to fail early if the requirements\n' + 'of the playbook are not met. One or more of these checks\n' + 'failed. To disregard these results, you may choose to\n' + 'disable failing checks by setting an Ansible variable:\n\n' + ' openshift_disable_check={checks}\n\n' + 'Failing check names are shown in the failure details above.\n' + 'Some checks may be configurable by variables if your requirements\n' + 'are different from the defaults; consult check documentation.\n' + 'Variables can be set in the inventory or passed on the\n' + 'command line using the -e flag to ansible-playbook.\n' + ).format(playbook=self._playbook_file, checks=checks) + if context in ['pre-install', 'health']: + summary = ( + '\n' + 'You may choose to configure or disable failing checks by\n' + 'setting Ansible variables. To disable those above:\n\n' + ' openshift_disable_check={checks}\n\n' + 'Consult check documentation for configurable variables.\n' + 'Variables can be set in the inventory or passed on the\n' + 'command line using the -e flag to ansible-playbook.\n' + ).format(checks=checks) + # other expected contexts: install, upgrade + self._display.display(summary) + + +# re: result attrs see top comment # pylint: disable=protected-access def _format_failure(failure): '''Return a list of pretty-formatted text entries describing a failure, including relevant information about it. Expect that the list of text entries will be joined @@ -100,11 +146,8 @@ def _format_failed_checks(checks): return stringc(pformat(checks), C.COLOR_ERROR) -# Reason: disable pylint protected-access because we need to access _* -# attributes of obj to implement this function. -# This is inspired by ansible.playbook.base.Base.dump_me. -# Status: permanently disabled unless Ansible's API changes. -# pylint: disable=protected-access +# This is inspired by ansible.playbook.base.Base.dump_me. +# re: play/task/block attrs see top comment # pylint: disable=protected-access def _get_play(obj): '''Given a task or block, recursively tries to find its parent play.''' if hasattr(obj, '_play'): diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index c2792a0fe..962148cb8 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -27,10 +27,12 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): group_names = get_var(task_vars, "group_names") ansible_mounts = get_var(task_vars, "ansible_mounts") - - min_free_bytes = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) free_bytes = self.openshift_available_disk(ansible_mounts) + recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) + configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9 + min_free_bytes = configured_min or recommended_min + if free_bytes < min_free_bytes: return { 'failed': True, diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py index 28805dc37..8b1a58ef4 100644 --- a/roles/openshift_health_checker/openshift_checks/memory_availability.py +++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py @@ -13,7 +13,7 @@ class MemoryAvailability(OpenShiftCheck): recommended_memory_bytes = { "masters": 16 * 10**9, "nodes": 8 * 10**9, - "etcd": 20 * 10**9, + "etcd": 8 * 10**9, } @classmethod @@ -27,7 +27,9 @@ class MemoryAvailability(OpenShiftCheck): group_names = get_var(task_vars, "group_names") total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * 10**6 - min_memory_bytes = max(self.recommended_memory_bytes.get(name, 0) for name in group_names) + recommended_min = max(self.recommended_memory_bytes.get(name, 0) for name in group_names) + configured_min = int(get_var(task_vars, "openshift_check_min_host_memory_gb", default=0)) * 10**9 + min_memory_bytes = configured_min or recommended_min if total_memory_bytes < min_memory_bytes: return { diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index 2693ae37b..6ebf0ebb2 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -67,6 +67,7 @@ def changed(result): return result.get('changed', False) +# tests whether task is skipped, not individual checks def skipped(result): return result.get('skipped', False) @@ -101,7 +102,20 @@ def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch): result = plugin.run(tmp=None, task_vars=task_vars) - assert result['checks']['fake_check'] == {'skipped': True} + assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Not active for this host") + assert not failed(result) + assert not changed(result) + assert not skipped(result) + + +def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch): + checks = [fake_check('fake_check', is_active=True)] + monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks)) + + task_vars['openshift_disable_check'] = 'fake_check' + result = plugin.run(tmp=None, task_vars=task_vars) + + assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Disabled by user request") assert not failed(result) assert not changed(result) assert not skipped(result) diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index 970b474d7..b353fa610 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -42,9 +42,10 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): assert word in str(excinfo.value) -@pytest.mark.parametrize('group_names,ansible_mounts', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_mounts', [ ( ['masters'], + 0, [{ 'mount': '/', 'size_available': 40 * 10**9 + 1, @@ -52,6 +53,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): ), ( ['nodes'], + 0, [{ 'mount': '/', 'size_available': 15 * 10**9 + 1, @@ -59,6 +61,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): ), ( ['etcd'], + 0, [{ 'mount': '/', 'size_available': 20 * 10**9 + 1, @@ -66,6 +69,15 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): ), ( ['etcd'], + 1, # configure lower threshold + [{ + 'mount': '/', + 'size_available': 1 * 10**9 + 1, # way smaller than recommended + }], + ), + ( + ['etcd'], + 0, [{ # not enough space on / ... 'mount': '/', @@ -77,9 +89,10 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): }], ), ]) -def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): +def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansible_mounts): task_vars = dict( group_names=group_names, + openshift_check_min_host_disk_gb=configured_min, ansible_mounts=ansible_mounts, ) @@ -89,9 +102,10 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): assert not result.get('failed', False) -@pytest.mark.parametrize('group_names,ansible_mounts,extra_words', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_mounts,extra_words', [ ( ['masters'], + 0, [{ 'mount': '/', 'size_available': 1, @@ -99,7 +113,17 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): ['0.0 GB'], ), ( + ['masters'], + 100, # set a higher threshold + [{ + 'mount': '/', + 'size_available': 50 * 10**9, # would normally be enough... + }], + ['100.0 GB'], + ), + ( ['nodes'], + 0, [{ 'mount': '/', 'size_available': 1 * 10**9, @@ -108,6 +132,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): ), ( ['etcd'], + 0, [{ 'mount': '/', 'size_available': 1, @@ -116,6 +141,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): ), ( ['nodes', 'masters'], + 0, [{ 'mount': '/', # enough space for a node, not enough for a master @@ -125,6 +151,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): ), ( ['etcd'], + 0, [{ # enough space on / ... 'mount': '/', @@ -137,9 +164,10 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): ['0.0 GB'], ), ]) -def test_fails_with_insufficient_disk_space(group_names, ansible_mounts, extra_words): +def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible_mounts, extra_words): task_vars = dict( group_names=group_names, + openshift_check_min_host_disk_gb=configured_min, ansible_mounts=ansible_mounts, ) diff --git a/roles/openshift_health_checker/test/memory_availability_test.py b/roles/openshift_health_checker/test/memory_availability_test.py index e161a5b9e..1db203854 100644 --- a/roles/openshift_health_checker/test/memory_availability_test.py +++ b/roles/openshift_health_checker/test/memory_availability_test.py @@ -20,27 +20,37 @@ def test_is_active(group_names, is_active): assert MemoryAvailability.is_active(task_vars=task_vars) == is_active -@pytest.mark.parametrize('group_names,ansible_memtotal_mb', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb', [ ( ['masters'], + 0, 17200, ), ( ['nodes'], + 0, 8200, ), ( + ['nodes'], + 1, # configure lower threshold + 2000, # too low for recommended but not for configured + ), + ( ['etcd'], - 22200, + 0, + 8200, ), ( ['masters', 'nodes'], + 0, 17000, ), ]) -def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb): +def test_succeeds_with_recommended_memory(group_names, configured_min, ansible_memtotal_mb): task_vars = dict( group_names=group_names, + openshift_check_min_host_memory_gb=configured_min, ansible_memtotal_mb=ansible_memtotal_mb, ) @@ -50,39 +60,56 @@ def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb): assert not result.get('failed', False) -@pytest.mark.parametrize('group_names,ansible_memtotal_mb,extra_words', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb,extra_words', [ ( ['masters'], 0, + 0, ['0.0 GB'], ), ( ['nodes'], + 0, 100, ['0.1 GB'], ), ( + ['nodes'], + 24, # configure higher threshold + 20000, # enough to meet recommended but not configured + ['20.0 GB'], + ), + ( ['etcd'], - -1, - ['0.0 GB'], + 0, + 7000, + ['7.0 GB'], + ), + ( + ['etcd', 'masters'], + 0, + 9000, # enough memory for etcd, not enough for a master + ['9.0 GB'], ), ( ['nodes', 'masters'], + 0, # enough memory for a node, not enough for a master 11000, ['11.0 GB'], ), ]) -def test_fails_with_insufficient_memory(group_names, ansible_memtotal_mb, extra_words): +def test_fails_with_insufficient_memory(group_names, configured_min, ansible_memtotal_mb, extra_words): task_vars = dict( group_names=group_names, + openshift_check_min_host_memory_gb=configured_min, ansible_memtotal_mb=ansible_memtotal_mb, ) check = MemoryAvailability(execute_module=fake_execute_module) result = check.run(tmp=None, task_vars=task_vars) - assert result['failed'] + assert result.get('failed', False) for word in 'below recommended'.split() + extra_words: assert word in result['msg'] diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 9af10a849..5d8506a73 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -1,5 +1,5 @@ --- -- local_action: shell rpm -q python-passlib || echo not installed +- local_action: shell python -c 'import passlib' 2>/dev/null || echo not installed register: passlib_result - name: Check that python-passlib is available on the control host |