Handle more exceptions when running checks

This prevents an exception in one check from interfering with other checks. Skips checks that raise an exception in their is_active method. Whenever capturing a broad exception in the `is_action` or `run` methods, include traceback information that can be useful in bug reports.
author: Rodolfo Carvalho <rhcarvalho@gmail.com> 2017-08-03 10:40:08 +0200
committer: Rodolfo Carvalho <rhcarvalho@gmail.com> 2017-08-24 15:00:16 +0200
commit: 75b1ef8fa6e80e7645a60cef2d4e7640a6c87955 (patch)
tree: c6de01ea22ccdbf58524887c59c4a57cfc3e80aa /roles/openshift_health_checker
parent: a28796fc669bd40f9384118f278b62001a15214d (diff)
download: openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.tar.gz
openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.tar.bz2
openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.tar.xz
openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.zip
1 files changed, 30 insertions, 19 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index 3e8962c3c..623c3eb8f 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -4,6 +4,7 @@ Ansible action plugin to execute health checks in OpenShift clusters.
 # pylint: disable=wrong-import-position,missing-docstring,invalid-name
 import sys
 import os
+import traceback
 from collections import defaultdict
 
 try:
@@ -58,26 +59,12 @@ class ActionModule(ActionBase):
 
         user_disabled_checks = normalize(task_vars.get('openshift_disable_check', []))
 
-        for check_name in resolved_checks:
-            display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"]))
-            check = known_checks[check_name]
-
-            if not check.is_active():
-                r = dict(skipped=True, skipped_reason="Not active for this host")
-            elif check_name in user_disabled_checks:
-                r = dict(skipped=True, skipped_reason="Disabled by user request")
-            else:
-                try:
-                    r = check.run()
-                except OpenShiftCheckException as e:
-                    r = dict(
-                        failed=True,
-                        msg=str(e),
-                    )
-
+        for name in resolved_checks:
+            display.banner("CHECK [{} : {}]".format(name, task_vars["ansible_host"]))
+            check = known_checks[name]
+            check_results[name] = run_check(name, check, user_disabled_checks)
             if check.changed:
-                r["changed"] = True
-            check_results[check_name] = r
+                check_results[name]["changed"] = True
 
         result["changed"] = any(r.get("changed") for r in check_results.values())
         if any(r.get("failed") for r in check_results.values()):
@@ -192,3 +179,27 @@ def normalize(checks):
     if isinstance(checks, string_types):
         checks = checks.split(',')
     return [name.strip() for name in checks if name.strip()]
+
+
+def run_check(name, check, user_disabled_checks):
+    """Run a single check if enabled and return a result dict."""
+    if name in user_disabled_checks:
+        return dict(skipped=True, skipped_reason="Disabled by user request")
+
+    # pylint: disable=broad-except; capturing exceptions broadly is intentional,
+    # to isolate arbitrary failures in one check from others.
+    try:
+        is_active = check.is_active()
+    except Exception as exc:
+        reason = "Could not determine if check should be run, exception: {}".format(exc)
+        return dict(skipped=True, skipped_reason=reason, exception=traceback.format_exc())
+
+    if not is_active:
+        return dict(skipped=True, skipped_reason="Not active for this host")
+
+    try:
+        return check.run()
+    except OpenShiftCheckException as exc:
+        return dict(failed=True, msg=str(exc))
+    except Exception as exc:
+        return dict(failed=True, msg=str(exc), exception=traceback.format_exc())
author	Rodolfo Carvalho <rhcarvalho@gmail.com>	2017-08-03 10:40:08 +0200
committer	Rodolfo Carvalho <rhcarvalho@gmail.com>	2017-08-24 15:00:16 +0200
commit	75b1ef8fa6e80e7645a60cef2d4e7640a6c87955 (patch)
tree	c6de01ea22ccdbf58524887c59c4a57cfc3e80aa /roles/openshift_health_checker
parent	a28796fc669bd40f9384118f278b62001a15214d (diff)
download	openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.tar.gz openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.tar.bz2 openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.tar.xz openshift-75b1ef8fa6e80e7645a60cef2d4e7640a6c87955.zip