summaryrefslogtreecommitdiffstats
path: root/playbooks/openshift-checks
diff options
context:
space:
mode:
Diffstat (limited to 'playbooks/openshift-checks')
-rw-r--r--playbooks/openshift-checks/README.md104
-rw-r--r--playbooks/openshift-checks/adhoc.yml25
-rw-r--r--playbooks/openshift-checks/certificate_expiry/default.yaml10
-rw-r--r--playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml40
-rw-r--r--playbooks/openshift-checks/certificate_expiry/easy-mode.yaml18
-rw-r--r--playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml12
-rw-r--r--playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml16
-rw-r--r--playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml13
-rw-r--r--playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml12
l---------playbooks/openshift-checks/certificate_expiry/roles1
-rw-r--r--playbooks/openshift-checks/health.yml4
-rw-r--r--playbooks/openshift-checks/pre-install.yml4
-rw-r--r--playbooks/openshift-checks/private/adhoc.yml13
-rw-r--r--playbooks/openshift-checks/private/health.yml13
-rw-r--r--playbooks/openshift-checks/private/install.yml51
-rw-r--r--playbooks/openshift-checks/private/pre-install.yml13
l---------playbooks/openshift-checks/private/roles1
l---------playbooks/openshift-checks/roles1
18 files changed, 351 insertions, 0 deletions
diff --git a/playbooks/openshift-checks/README.md b/playbooks/openshift-checks/README.md
new file mode 100644
index 000000000..0b7ea91ff
--- /dev/null
+++ b/playbooks/openshift-checks/README.md
@@ -0,0 +1,104 @@
+# OpenShift health checks
+
+This directory contains Ansible playbooks for detecting potential problems prior
+to an install, as well as health checks to run on existing OpenShift clusters.
+
+Ansible's default operation mode is to fail fast, on the first error. However,
+when performing checks, it is useful to gather as much information about
+problems as possible in a single run.
+
+Thus, the playbooks run a battery of checks against the inventory hosts and
+gather intermediate errors, giving a more complete diagnostic of the state of
+each host. If any check failed, the playbook run will be marked as failed.
+
+To facilitate understanding the problems that were encountered, a custom
+callback plugin summarizes execution errors at the end of a playbook run.
+
+## Available playbooks
+
+1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system
+ requirements and look for common problems that can prevent a successful
+ installation of a production cluster.
+
+2. Diagnostic playbook ([health.yml](health.yml)) - check an existing cluster
+ for known signs of problems.
+
+3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) -
+ check that certificates in use are valid and not expiring soon.
+
+4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks or to
+ list existing checks.
+ See the [next section](#the-adhoc-playbook) for a usage example.
+
+## Running
+
+With a [recent installation of Ansible](../../../README.md#setup), run the playbook
+against your inventory file. Here is the step-by-step:
+
+1. If you haven't done it yet, clone this repository:
+
+ ```console
+ $ git clone https://github.com/openshift/openshift-ansible
+ $ cd openshift-ansible
+ ```
+
+2. Install the [dependencies](../../../README.md#setup)
+
+3. Run the appropriate playbook:
+
+ ```console
+ $ ansible-playbook -i <inventory file> playbooks/openshift-checks/pre-install.yml
+ ```
+
+ or
+
+ ```console
+ $ ansible-playbook -i <inventory file> playbooks/openshift-checks/health.yml
+ ```
+
+ or
+
+ ```console
+ $ ansible-playbook -i <inventory file> playbooks/openshift-checks/certificate_expiry/default.yaml -v
+ ```
+
+### The adhoc playbook
+
+The adhoc playbook gives flexibility to run any check or a custom group of
+checks. What will be run is determined by the `openshift_checks` variable,
+which, among other ways supported by Ansible, can be set on the command line
+using the `-e` flag.
+
+For example, to run the `docker_storage` check:
+
+```console
+$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=docker_storage
+```
+
+To run more checks, use a comma-separated list of check names:
+
+```console
+$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=docker_storage,disk_availability
+```
+
+To run an entire class of checks, use the name of a check group tag, prefixed by `@`. This will run all checks tagged `preflight`:
+
+```console
+$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=@preflight
+```
+
+It is valid to specify multiple check tags and individual check names together
+in a comma-separated list.
+
+To list all of the available checks and tags, run the adhoc playbook without
+setting the `openshift_checks` variable:
+
+```console
+$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml
+```
+
+## Running in a container
+
+This repository is built into a Docker image including Ansible so that it can
+be run anywhere Docker is available, without the need to manually install dependencies.
+Instructions for doing so may be found [in the README](../../../README_CONTAINER_IMAGE.md).
diff --git a/playbooks/openshift-checks/adhoc.yml b/playbooks/openshift-checks/adhoc.yml
new file mode 100644
index 000000000..414090733
--- /dev/null
+++ b/playbooks/openshift-checks/adhoc.yml
@@ -0,0 +1,25 @@
+---
+# NOTE: ideally this would be just part of a single play in
+# private/adhoc.yml that lists the existing checks when
+# openshift_checks is not set or run the requested checks. However, to actually
+# run the checks we need to have the included dependencies to run first and that
+# takes time. To speed up listing checks, we use this separate play that runs
+# before the include of dependencies to save time and improve the UX.
+- name: OpenShift health checks
+ # NOTE: though the openshift_checks variable could be potentially defined on
+ # individual hosts while not defined for localhost, we do not support that
+ # usage. Running this play only in localhost speeds up execution.
+ hosts: localhost
+ connection: local
+ roles:
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: adhoc
+ pre_tasks:
+ - name: List known health checks
+ action: openshift_health_check
+ when: openshift_checks is undefined or not openshift_checks
+
+- import_playbook: ../init/main.yml
+
+- import_playbook: private/adhoc.yml
diff --git a/playbooks/openshift-checks/certificate_expiry/default.yaml b/playbooks/openshift-checks/certificate_expiry/default.yaml
new file mode 100644
index 000000000..630135cae
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/default.yaml
@@ -0,0 +1,10 @@
+---
+# Default behavior, you will need to ensure you run ansible with the
+# -v option to see report results:
+
+- name: Check cert expirys
+ hosts: nodes:masters:etcd
+ become: yes
+ gather_facts: no
+ roles:
+ - role: openshift_certificate_expiry
diff --git a/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml b/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml
new file mode 100644
index 000000000..378d1f154
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml
@@ -0,0 +1,40 @@
+# This example generates HTML and JSON reports and
+#
+# Copies of the generated HTML and JSON reports are uploaded to the masters,
+# which is particularly useful when this playbook is run from a container.
+#
+# All certificates (healthy or not) are included in the results
+#
+# Optional environment variables to alter the behaviour of the playbook:
+# CERT_EXPIRY_WARN_DAYS: Length of the warning window in days (45)
+# COPY_TO_PATH: path to copy reports to in the masters (/etc/origin/certificate_expiration_report)
+---
+- name: Generate certificate expiration reports
+ hosts: nodes:masters:etcd
+ gather_facts: no
+ vars:
+ openshift_certificate_expiry_save_json_results: yes
+ openshift_certificate_expiry_generate_html_report: yes
+ openshift_certificate_expiry_show_all: yes
+ openshift_certificate_expiry_warning_days: "{{ lookup('env', 'CERT_EXPIRY_WARN_DAYS') | default('45', true) }}"
+ roles:
+ - role: openshift_certificate_expiry
+
+- name: Upload reports to master
+ hosts: masters
+ gather_facts: no
+ vars:
+ destination_path: "{{ lookup('env', 'COPY_TO_PATH') | default('/etc/origin/certificate_expiration_report', true) }}"
+ timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}"
+ tasks:
+ - name: Ensure that the target directory exists
+ file:
+ path: "{{ destination_path }}"
+ state: directory
+ - name: Copy the reports
+ copy:
+ dest: "{{ destination_path }}/{{ timestamp }}-{{ item }}"
+ src: "/tmp/{{ item }}"
+ with_items:
+ - "cert-expiry-report.html"
+ - "cert-expiry-report.json"
diff --git a/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml b/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml
new file mode 100644
index 000000000..ae41c7c14
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml
@@ -0,0 +1,18 @@
+---
+# This example playbook is great if you're just wanting to try the
+# role out.
+#
+# This example enables HTML and JSON reports
+#
+# All certificates (healthy or not) are included in the results
+
+- name: Check cert expirys
+ hosts: nodes:masters:etcd
+ become: yes
+ gather_facts: no
+ vars:
+ openshift_certificate_expiry_save_json_results: yes
+ openshift_certificate_expiry_generate_html_report: yes
+ openshift_certificate_expiry_show_all: yes
+ roles:
+ - role: openshift_certificate_expiry
diff --git a/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml b/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml
new file mode 100644
index 000000000..d80cb6ff4
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml
@@ -0,0 +1,12 @@
+---
+# Generate HTML and JSON artifacts in their default paths:
+
+- name: Check cert expirys
+ hosts: nodes:masters:etcd
+ become: yes
+ gather_facts: no
+ vars:
+ openshift_certificate_expiry_generate_html_report: yes
+ openshift_certificate_expiry_save_json_results: yes
+ roles:
+ - role: openshift_certificate_expiry
diff --git a/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml b/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml
new file mode 100644
index 000000000..2189455b7
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml
@@ -0,0 +1,16 @@
+---
+# Generate timestamped HTML and JSON reports in /var/lib/certcheck
+
+- name: Check cert expirys
+ hosts: nodes:masters:etcd
+ become: yes
+ gather_facts: no
+ vars:
+ openshift_certificate_expiry_generate_html_report: yes
+ openshift_certificate_expiry_save_json_results: yes
+ openshift_certificate_expiry_show_all: yes
+ timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}"
+ openshift_certificate_expiry_html_report_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.html"
+ openshift_certificate_expiry_json_results_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.json"
+ roles:
+ - role: openshift_certificate_expiry
diff --git a/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml b/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml
new file mode 100644
index 000000000..87a0f3be4
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml
@@ -0,0 +1,13 @@
+---
+# Change the expiration warning window to 1500 days (good for testing
+# the module out) and save the results as a JSON file:
+
+- name: Check cert expirys
+ hosts: nodes:masters:etcd
+ become: yes
+ gather_facts: no
+ vars:
+ openshift_certificate_expiry_warning_days: 1500
+ openshift_certificate_expiry_save_json_results: yes
+ roles:
+ - role: openshift_certificate_expiry
diff --git a/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml b/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml
new file mode 100644
index 000000000..960457c4b
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml
@@ -0,0 +1,12 @@
+---
+# Change the expiration warning window to 1500 days (good for testing
+# the module out):
+
+- name: Check cert expirys
+ hosts: nodes:masters:etcd
+ become: yes
+ gather_facts: no
+ vars:
+ openshift_certificate_expiry_warning_days: 1500
+ roles:
+ - role: openshift_certificate_expiry
diff --git a/playbooks/openshift-checks/certificate_expiry/roles b/playbooks/openshift-checks/certificate_expiry/roles
new file mode 120000
index 000000000..20c4c58cf
--- /dev/null
+++ b/playbooks/openshift-checks/certificate_expiry/roles
@@ -0,0 +1 @@
+../../../roles \ No newline at end of file
diff --git a/playbooks/openshift-checks/health.yml b/playbooks/openshift-checks/health.yml
new file mode 100644
index 000000000..caac06626
--- /dev/null
+++ b/playbooks/openshift-checks/health.yml
@@ -0,0 +1,4 @@
+---
+- import_playbook: ../init/main.yml
+
+- import_playbook: private/health.yml
diff --git a/playbooks/openshift-checks/pre-install.yml b/playbooks/openshift-checks/pre-install.yml
new file mode 100644
index 000000000..4511f6e3c
--- /dev/null
+++ b/playbooks/openshift-checks/pre-install.yml
@@ -0,0 +1,4 @@
+---
+- import_playbook: ../init/main.yml
+
+- import_playbook: private/pre-install.yml
diff --git a/playbooks/openshift-checks/private/adhoc.yml b/playbooks/openshift-checks/private/adhoc.yml
new file mode 100644
index 000000000..d0deaeb65
--- /dev/null
+++ b/playbooks/openshift-checks/private/adhoc.yml
@@ -0,0 +1,13 @@
+---
+- name: OpenShift Health Checks
+ hosts: oo_all_hosts
+
+ roles:
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: adhoc
+ post_tasks:
+ - name: Run health checks (adhoc)
+ action: openshift_health_check
+ args:
+ checks: '{{ openshift_checks | default([]) }}'
diff --git a/playbooks/openshift-checks/private/health.yml b/playbooks/openshift-checks/private/health.yml
new file mode 100644
index 000000000..d0921b9d3
--- /dev/null
+++ b/playbooks/openshift-checks/private/health.yml
@@ -0,0 +1,13 @@
+---
+- name: OpenShift Health Checks
+ hosts: oo_all_hosts
+
+ roles:
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: health
+ post_tasks:
+ - name: Run health checks (@health)
+ action: openshift_health_check
+ args:
+ checks: ['@health']
diff --git a/playbooks/openshift-checks/private/install.yml b/playbooks/openshift-checks/private/install.yml
new file mode 100644
index 000000000..93cf6c359
--- /dev/null
+++ b/playbooks/openshift-checks/private/install.yml
@@ -0,0 +1,51 @@
+---
+- name: Health Check Checkpoint Start
+ hosts: all
+ gather_facts: false
+ tasks:
+ - name: Set Health Check 'In Progress'
+ run_once: true
+ set_stats:
+ data:
+ installer_phase_health:
+ status: "In Progress"
+ start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
+
+- name: OpenShift Health Checks
+ hosts: oo_all_hosts
+ any_errors_fatal: true
+ roles:
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: install
+ post_tasks:
+ - name: Run health checks (install) - EL
+ when: ansible_distribution != "Fedora"
+ action: openshift_health_check
+ args:
+ checks:
+ - disk_availability
+ - memory_availability
+ - package_availability
+ - package_version
+ - docker_image_availability
+ - docker_storage
+
+ - name: Run health checks (install) - Fedora
+ when: ansible_distribution == "Fedora"
+ action: openshift_health_check
+ args:
+ checks:
+ - docker_image_availability
+
+- name: Health Check Checkpoint End
+ hosts: all
+ gather_facts: false
+ tasks:
+ - name: Set Health Check 'Complete'
+ run_once: true
+ set_stats:
+ data:
+ installer_phase_health:
+ status: "Complete"
+ end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
diff --git a/playbooks/openshift-checks/private/pre-install.yml b/playbooks/openshift-checks/private/pre-install.yml
new file mode 100644
index 000000000..32449d4e4
--- /dev/null
+++ b/playbooks/openshift-checks/private/pre-install.yml
@@ -0,0 +1,13 @@
+---
+- name: OpenShift Health Checks
+ hosts: oo_all_hosts
+
+ roles:
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: pre-install
+ post_tasks:
+ - name: Run health checks (@preflight)
+ action: openshift_health_check
+ args:
+ checks: ['@preflight']
diff --git a/playbooks/openshift-checks/private/roles b/playbooks/openshift-checks/private/roles
new file mode 120000
index 000000000..20c4c58cf
--- /dev/null
+++ b/playbooks/openshift-checks/private/roles
@@ -0,0 +1 @@
+../../../roles \ No newline at end of file
diff --git a/playbooks/openshift-checks/roles b/playbooks/openshift-checks/roles
new file mode 120000
index 000000000..b741aa3db
--- /dev/null
+++ b/playbooks/openshift-checks/roles
@@ -0,0 +1 @@
+../../roles \ No newline at end of file