diff options
Diffstat (limited to 'playbooks')
42 files changed, 308 insertions, 388 deletions
diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 6119990fe..1c8257162 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -313,7 +313,7 @@ - name: restart docker service: name=docker state=restarted ignore_errors: true - when: "container_engine.state != 'started'" + when: not (container_engine | changed) - name: restart NetworkManager service: name=NetworkManager state=restarted diff --git a/playbooks/byo/openshift-checks/README.md b/playbooks/byo/openshift-checks/README.md new file mode 100644 index 000000000..4b2ff1f94 --- /dev/null +++ b/playbooks/byo/openshift-checks/README.md @@ -0,0 +1,65 @@ +# OpenShift health checks + +This directory contains Ansible playbooks for detecting potential problems prior +to an install, as well as health checks to run on existing OpenShift clusters. + +Ansible's default operation mode is to fail fast, on the first error. However, +when performing checks, it is useful to gather as much information about +problems as possible in a single run. + +Thus, the playbooks run a battery of checks against the inventory hosts and have +Ansible gather intermediate errors, giving a more complete diagnostic of the +state of each host. If any check failed, the playbook run will be marked as +failed. + +To facilitate understanding the problems that were encountered, a custom +callback plugin summarizes execution errors at the end of a playbook run. + +# Available playbooks + +1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system + requirements and look for common problems that can prevent a successful + installation of a production cluster. + +2. Diagnostic playbook ([health.yml](health.yml)) - check an existing cluster + for known signs of problems. + +3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - + check that certificates in use are valid and not expiring soon. + +## Running + +With a [recent installation of Ansible](../../../README.md#setup), run the playbook +against your inventory file. Here is the step-by-step: + +1. If you haven't done it yet, clone this repository: + + ```console + $ git clone https://github.com/openshift/openshift-ansible + $ cd openshift-ansible + ``` + +2. Run the appropriate playbook: + + ```console + $ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/pre-install.yml + ``` + + or + + ```console + $ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/health.yml + ``` + + or + + ```console + $ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/certificate_expiry/default.yaml -v + ``` + +## Running via Docker image + +This repository is built into a Docker image including Ansible so that it can +be run anywhere Docker is available. Instructions for doing so may be found +[in the README](../../README_CONTAINER_IMAGE.md). + diff --git a/playbooks/certificate_expiry/default.yaml b/playbooks/byo/openshift-checks/certificate_expiry/default.yaml index 630135cae..630135cae 100644 --- a/playbooks/certificate_expiry/default.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/default.yaml diff --git a/playbooks/certificate_expiry/easy-mode-upload.yaml b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode-upload.yaml index 378d1f154..378d1f154 100644 --- a/playbooks/certificate_expiry/easy-mode-upload.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode-upload.yaml diff --git a/playbooks/certificate_expiry/easy-mode.yaml b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode.yaml index ae41c7c14..ae41c7c14 100644 --- a/playbooks/certificate_expiry/easy-mode.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode.yaml diff --git a/playbooks/certificate_expiry/html_and_json_default_paths.yaml b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml index d80cb6ff4..d80cb6ff4 100644 --- a/playbooks/certificate_expiry/html_and_json_default_paths.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml diff --git a/playbooks/certificate_expiry/html_and_json_timestamp.yaml b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml index 2189455b7..2189455b7 100644 --- a/playbooks/certificate_expiry/html_and_json_timestamp.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml diff --git a/playbooks/certificate_expiry/longer-warning-period-json-results.yaml b/playbooks/byo/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml index 87a0f3be4..87a0f3be4 100644 --- a/playbooks/certificate_expiry/longer-warning-period-json-results.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml diff --git a/playbooks/certificate_expiry/longer_warning_period.yaml b/playbooks/byo/openshift-checks/certificate_expiry/longer_warning_period.yaml index 960457c4b..960457c4b 100644 --- a/playbooks/certificate_expiry/longer_warning_period.yaml +++ b/playbooks/byo/openshift-checks/certificate_expiry/longer_warning_period.yaml diff --git a/playbooks/byo/openshift-checks/certificate_expiry/roles b/playbooks/byo/openshift-checks/certificate_expiry/roles new file mode 120000 index 000000000..4bdbcbad3 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/roles @@ -0,0 +1 @@ +../../../../roles
\ No newline at end of file diff --git a/playbooks/byo/openshift-checks/health.yml b/playbooks/byo/openshift-checks/health.yml new file mode 100644 index 000000000..dfc1a7db0 --- /dev/null +++ b/playbooks/byo/openshift-checks/health.yml @@ -0,0 +1,3 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml +- include: ../../common/openshift-checks/health.yml diff --git a/playbooks/byo/openshift-checks/pre-install.yml b/playbooks/byo/openshift-checks/pre-install.yml new file mode 100644 index 000000000..5e8c3ab9b --- /dev/null +++ b/playbooks/byo/openshift-checks/pre-install.yml @@ -0,0 +1,3 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml +- include: ../../common/openshift-checks/pre-install.yml diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index acf5469bf..fd4a9eb26 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -3,6 +3,19 @@ tags: - always +- name: Verify Requirements + hosts: OSEv3 + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "install" + post_tasks: + - action: openshift_health_check + args: + checks: + - disk_availability + - memory_availability + - include: ../../common/openshift-cluster/std_include.yml tags: - always diff --git a/playbooks/byo/openshift-preflight/README.md b/playbooks/byo/openshift-preflight/README.md deleted file mode 100644 index b50292eac..000000000 --- a/playbooks/byo/openshift-preflight/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# OpenShift preflight checks - -Here we provide an Ansible playbook for detecting potential roadblocks prior to -an install or upgrade. - -Ansible's default operation mode is to fail fast, on the first error. However, -when performing checks, it is useful to gather as much information about -problems as possible in a single run. - -The `check.yml` playbook runs a battery of checks against the inventory hosts -and tells Ansible to ignore intermediate errors, thus giving a more complete -diagnostic of the state of each host. Still, if any check failed, the playbook -run will be marked as having failed. - -To facilitate understanding the problems that were encountered, we provide a -custom callback plugin to summarize execution errors at the end of a playbook -run. - ---- - -*Note that currently the `check.yml` playbook is only useful for RPM-based -installations. Containerized installs are excluded from checks for now, but -might be included in the future if there is demand for that.* - ---- - -## Running - -With an installation of Ansible 2.2 or greater, run the playbook directly -against your inventory file. Here is the step-by-step: - -1. If you haven't done it yet, clone this repository: - - ```console - $ git clone https://github.com/openshift/openshift-ansible - $ cd openshift-ansible - ``` - -2. Run the playbook: - - ```console - $ ansible-playbook -i <inventory file> playbooks/byo/openshift-preflight/check.yml - ``` diff --git a/playbooks/byo/openshift-preflight/check.yml b/playbooks/byo/openshift-preflight/check.yml index eb763221f..2e53452a6 100644 --- a/playbooks/byo/openshift-preflight/check.yml +++ b/playbooks/byo/openshift-preflight/check.yml @@ -1,15 +1,3 @@ --- -- include: ../openshift-cluster/initialize_groups.yml - -- name: Run OpenShift health checks - # Temporarily reverting to OSEv3 until group standardization is complete - hosts: OSEv3 - roles: - - openshift_health_checker - post_tasks: - # NOTE: we need to use the old "action: name" syntax until - # https://github.com/ansible/ansible/issues/20513 is fixed. - - action: openshift_health_check - args: - checks: - - '@preflight' +# location is moved; this file remains so existing instructions keep working +- include: ../openshift-checks/pre-install.yml diff --git a/playbooks/certificate_expiry b/playbooks/certificate_expiry new file mode 120000 index 000000000..9cf5334a1 --- /dev/null +++ b/playbooks/certificate_expiry @@ -0,0 +1 @@ +byo/openshift-checks/certificate_expiry/
\ No newline at end of file diff --git a/playbooks/certificate_expiry/roles b/playbooks/certificate_expiry/roles deleted file mode 120000 index b741aa3db..000000000 --- a/playbooks/certificate_expiry/roles +++ /dev/null @@ -1 +0,0 @@ -../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-checks/health.yml b/playbooks/common/openshift-checks/health.yml new file mode 100644 index 000000000..1bee460e8 --- /dev/null +++ b/playbooks/common/openshift-checks/health.yml @@ -0,0 +1,11 @@ +--- +- name: Run OpenShift health checks + hosts: OSEv3 + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "health" + post_tasks: + - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 + args: + checks: ['@health'] diff --git a/playbooks/common/openshift-checks/pre-install.yml b/playbooks/common/openshift-checks/pre-install.yml new file mode 100644 index 000000000..e01c6f38d --- /dev/null +++ b/playbooks/common/openshift-checks/pre-install.yml @@ -0,0 +1,11 @@ +--- +- hosts: OSEv3 + name: run OpenShift pre-install checks + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: "pre-install" + post_tasks: + - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513 + args: + checks: ['@preflight'] diff --git a/playbooks/byo/openshift-preflight/roles b/playbooks/common/openshift-checks/roles index 20c4c58cf..20c4c58cf 120000 --- a/playbooks/byo/openshift-preflight/roles +++ b/playbooks/common/openshift-checks/roles diff --git a/playbooks/common/openshift-cluster/config.yml b/playbooks/common/openshift-cluster/config.yml index c320b80ed..1482b3a3f 100644 --- a/playbooks/common/openshift-cluster/config.yml +++ b/playbooks/common/openshift-cluster/config.yml @@ -8,10 +8,10 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: disable + roles: + - role: openshift_excluder + r_openshift_excluder_action: disable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" - include: ../openshift-etcd/config.yml tags: @@ -50,7 +50,7 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: enable + roles: + - role: openshift_excluder + r_openshift_excluder_action: enable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml b/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml index 4fa7f9cdf..0d0ff798c 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates/ca.yml @@ -9,7 +9,8 @@ - name: Backup existing etcd CA certificate directories hosts: oo_etcd_to_config roles: - - etcd_common + - role: etcd_common + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" tasks: - name: Determine if CA certificate directory exists stat: @@ -52,7 +53,8 @@ vars: etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}" roles: - - etcd_common + - role: etcd_common + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" tasks: - name: Create a tarball of the etcd ca certs command: > @@ -98,7 +100,8 @@ - name: Retrieve etcd CA certificate hosts: oo_first_etcd roles: - - etcd_common + - role: etcd_common + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" tasks: - name: Retrieve etcd CA certificate fetch: diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml b/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml index 2963a5940..6b5c805e6 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates/etcd.yml @@ -3,7 +3,8 @@ hosts: oo_first_etcd any_errors_fatal: true roles: - - etcd_common + - role: etcd_common + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" post_tasks: - name: Determine if generated etcd certificates exist stat: @@ -27,7 +28,8 @@ hosts: oo_etcd_to_config any_errors_fatal: true roles: - - etcd_common + - role: etcd_common + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" post_tasks: - name: Backup etcd certificates command: > @@ -50,6 +52,7 @@ etcd_peers: "{{ groups.oo_etcd_to_config | default([], true) }}" etcd_certificates_etcd_hosts: "{{ groups.oo_etcd_to_config | default([], true) }}" openshift_ca_host: "{{ groups.oo_first_master.0 }}" + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" - name: Redeploy etcd client certificates for masters hosts: oo_masters_to_config @@ -63,4 +66,5 @@ etcd_cert_prefix: "master.etcd-" openshift_ca_host: "{{ groups.oo_first_master.0 }}" openshift_master_count: "{{ openshift.master.master_count | default(groups.oo_masters | length) }}" + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" when: groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config diff --git a/playbooks/common/openshift-cluster/upgrades/disable_master_excluders.yml b/playbooks/common/openshift-cluster/upgrades/disable_master_excluders.yml index 35da3b6c3..800621857 100644 --- a/playbooks/common/openshift-cluster/upgrades/disable_master_excluders.yml +++ b/playbooks/common/openshift-cluster/upgrades/disable_master_excluders.yml @@ -2,13 +2,11 @@ - name: Disable excluders hosts: oo_masters_to_config gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: verify_upgrade - - include_role: - name: openshift_excluder - tasks_from: disable - vars: - openshift_excluder_package_state: latest - docker_excluder_package_state: latest + roles: + - role: openshift_excluder + r_openshift_excluder_action: disable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" + r_openshift_excluder_verify_upgrade: true + r_openshift_excluder_upgrade_target: "{{ openshift_upgrade_target }}" + r_openshift_excluder_package_state: latest + r_openshift_excluder_docker_package_state: latest diff --git a/playbooks/common/openshift-cluster/upgrades/disable_node_excluders.yml b/playbooks/common/openshift-cluster/upgrades/disable_node_excluders.yml index 847c22085..7988e97ab 100644 --- a/playbooks/common/openshift-cluster/upgrades/disable_node_excluders.yml +++ b/playbooks/common/openshift-cluster/upgrades/disable_node_excluders.yml @@ -2,13 +2,11 @@ - name: Disable excluders hosts: oo_nodes_to_config gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: verify_upgrade - - include_role: - name: openshift_excluder - tasks_from: disable - vars: - openshift_excluder_package_state: latest - docker_excluder_package_state: latest + roles: + - role: openshift_excluder + r_openshift_excluder_action: disable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" + r_openshift_excluder_verify_upgrade: true + r_openshift_excluder_upgrade_target: "{{ openshift_upgrade_target }}" + r_openshift_excluder_package_state: latest + r_openshift_excluder_docker_package_state: latest diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index 9d0333ca8..b7fd2c0c5 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -1,84 +1,14 @@ --- - name: Backup etcd hosts: oo_etcd_hosts_to_backup - vars: - embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - etcdctl_command: "{{ 'etcdctl' if not openshift.common.is_containerized or embedded_etcd else 'docker exec etcd_container etcdctl' if not openshift.common.is_etcd_system_container else 'runc exec etcd etcdctl' }}" - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" roles: - - openshift_facts - tasks: - # Ensure we persist the etcd role for this host in openshift_facts - - openshift_facts: - role: etcd - local_facts: {} - when: "'etcd' not in openshift" - - set_fact: - etcd_backup_dir: "{{ openshift.etcd.etcd_data_dir }}/openshift-backup-{{ backup_tag | default('') }}{{ timestamp }}" - - # TODO: replace shell module with command and update later checks - - name: Check available disk space for etcd backup - shell: df --output=avail -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 - register: avail_disk - # AUDIT:changed_when: `false` because we are only inspecting - # state, not manipulating anything - changed_when: false - - # TODO: replace shell module with command and update later checks - - name: Check current etcd disk usage - shell: du --exclude='*openshift-backup*' -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: embedded_etcd | bool - # AUDIT:changed_when: `false` because we are only inspecting - # state, not manipulating anything - changed_when: false - - - name: Abort if insufficient disk space for etcd backup - fail: - msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) - - # For non containerized and non embedded we should have the correct version of - # etcd installed already. So don't do anything. - # - # For containerized installs we now exec into etcd_container - # - # For embedded non containerized we need to ensure we have the latest version - # etcd on the host. - - name: Install latest etcd for embedded - package: - name: etcd - state: latest - when: - - embedded_etcd | bool - - not openshift.common.is_atomic | bool - - - name: Generate etcd backup - command: > - {{ etcdctl_command }} backup --data-dir={{ openshift.etcd.etcd_data_dir }} - --backup-dir={{ etcd_backup_dir }} - - # According to the docs change you can simply copy snap/db - # https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6 - - name: Check for v3 data store - stat: - path: "{{ openshift.etcd.etcd_data_dir }}/member/snap/db" - register: v3_db - - - name: Copy etcd v3 data store - command: > - cp -a {{ openshift.etcd.etcd_data_dir }}/member/snap/db - {{ etcd_backup_dir }}/member/snap/ - when: v3_db.stat.exists - - - set_fact: - etcd_backup_complete: True - - - name: Display location of etcd backup - debug: - msg: "Etcd backup created in {{ etcd_backup_dir }}" + - role: openshift_facts + - role: etcd_upgrade + r_etcd_upgrade_action: backup + r_etcd_backup_tag: etcd_backup_tag + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" + r_etcd_upgrade_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + r_etcd_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - name: Gate on etcd backup hosts: localhost @@ -88,7 +18,7 @@ - set_fact: etcd_backup_completed: "{{ hostvars | oo_select_keys(groups.oo_etcd_hosts_to_backup) - | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" + | oo_collect('inventory_hostname', {'r_etcd_upgrade_backup_complete': true}) }}" - set_fact: etcd_backup_failed: "{{ groups.oo_etcd_hosts_to_backup | difference(etcd_backup_completed) }}" - fail: diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml deleted file mode 100644 index 5f8b59e17..000000000 --- a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -- name: Verify cluster is healthy pre-upgrade - command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" - -- name: Get current image - shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}' - register: current_image - -- name: Set new_etcd_image - set_fact: - new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd:' ~ upgrade_version ) }}" - -- name: Pull new etcd image - command: "docker pull {{ new_etcd_image }}" - -- name: Update to latest etcd image - replace: - dest: /etc/systemd/system/etcd_container.service - regexp: "{{ current_image.stdout }}$" - replace: "{{ new_etcd_image }}" - -- name: Restart etcd_container - systemd: - name: etcd_container - daemon_reload: yes - state: restarted - -## TODO: probably should just move this into the backup playbooks, also this -## will fail on atomic host. We need to revisit how to do etcd backups there as -## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) -- name: Upgrade etcd for etcdctl when not atomic - package: name=etcd state=latest - when: not openshift.common.is_atomic | bool - -- name: Verify cluster is healthy - command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" - register: etcdctl - until: etcdctl.rc == 0 - retries: 3 - delay: 10 - -- name: Store new etcd_image - openshift_facts: - role: etcd - local_facts: - etcd_image: "{{ new_etcd_image }}" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml deleted file mode 100644 index 30232110e..000000000 --- a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- -# F23 GA'd with etcd 2.0, currently has 2.2 in updates -# F24 GA'd with etcd-2.2, currently has 2.2 in updates -# F25 Beta currently has etcd 3.0 -- name: Verify cluster is healthy pre-upgrade - command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" - -- name: Update etcd - package: - name: "etcd" - state: "latest" - -- name: Restart etcd - service: - name: etcd - state: restarted - -- name: Verify cluster is healthy - command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" - register: etcdctl - until: etcdctl.rc == 0 - retries: 3 - delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml index d9b59edcb..3e01883ae 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -8,7 +8,7 @@ - name: Backup etcd before upgrading anything include: backup.yml vars: - backup_tag: "pre-upgrade-" + etcd_backup_tag: "pre-upgrade-" when: openshift_etcd_backup | default(true) | bool - name: Drop etcdctl profiles diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml deleted file mode 100644 index 3a972e8ab..000000000 --- a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml +++ /dev/null @@ -1,20 +0,0 @@ ---- -- name: Verify cluster is healthy pre-upgrade - command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" - -- name: Update etcd RPM - package: - name: etcd-{{ upgrade_version }}* - state: latest - -- name: Restart etcd - service: - name: etcd - state: restarted - -- name: Verify cluster is healthy - command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" - register: etcdctl - until: etcdctl.rc == 0 - retries: 3 - delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml index 45e301315..0431c1ce0 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml @@ -2,118 +2,109 @@ - name: Determine etcd version hosts: oo_etcd_hosts_to_upgrade tasks: - - name: Record RPM based etcd version - command: rpm -qa --qf '%{version}' etcd\* - args: - warn: no - register: etcd_rpm_version - failed_when: false - when: not openshift.common.is_containerized | bool - # AUDIT:changed_when: `false` because we are only inspecting - # state, not manipulating anything - changed_when: false - - - name: Record containerized etcd version - command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* - register: etcd_container_version - failed_when: false - when: openshift.common.is_containerized | bool - # AUDIT:changed_when: `false` because we are only inspecting - # state, not manipulating anything - changed_when: false - - - name: Record containerized etcd version - command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* - register: etcd_container_version - failed_when: false - when: openshift.common.is_containerized | bool and not openshift.common.is_etcd_system_container | bool - # AUDIT:changed_when: `false` because we are only inspecting - # state, not manipulating anything - changed_when: false - - - name: Record containerized etcd version - command: runc exec etcd_container rpm -qa --qf '%{version}' etcd\* - register: etcd_container_version - failed_when: false - when: openshift.common.is_containerized | bool and openshift.common.is_etcd_system_container | bool - # AUDIT:changed_when: `false` because we are only inspecting - # state, not manipulating anything - changed_when: false - -# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop -# through hosts, then loop through tasks only when appropriate -- name: Upgrade to 2.1 - hosts: oo_etcd_hosts_to_upgrade - serial: 1 + - block: + - name: Record RPM based etcd version + command: rpm -qa --qf '%{version}' etcd\* + args: + warn: no + register: etcd_rpm_version + failed_when: false + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + - debug: + msg: "Etcd rpm version {{ etcd_rpm_version.stdout }} detected" + when: + - not openshift.common.is_containerized | bool + + - block: + - name: Record containerized etcd version (docker) + command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* + register: etcd_container_version_docker + failed_when: false + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + when: + - not openshift.common.is_etcd_system_container | bool + + # Given a register variables is set even if the whwen condition + # is false, we need to set etcd_container_version separately + - set_fact: + etcd_container_version: "{{ etcd_container_version_docker.stdout }}" + when: + - not openshift.common.is_etcd_system_container | bool + + - name: Record containerized etcd version (runc) + command: runc exec etcd_container rpm -qa --qf '%{version}' etcd\* + register: etcd_container_version_runc + failed_when: false + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + when: + - openshift.common.is_etcd_system_container | bool + + # Given a register variables is set even if the whwen condition + # is false, we need to set etcd_container_version separately + - set_fact: + etcd_container_version: "{{ etcd_container_version_runc.stdout }}" + when: + - openshift.common.is_etcd_system_container | bool + + - debug: + msg: "Etcd containerized version {{ etcd_container_version }} detected" + when: + - openshift.common.is_containerized | bool + +- include: upgrade_rpm_members.yml vars: - upgrade_version: '2.1' - tasks: - - include: rhel_tasks.yml - when: etcd_rpm_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + etcd_upgrade_version: '2.1' -- name: Upgrade RPM hosts to 2.2 - hosts: oo_etcd_hosts_to_upgrade - serial: 1 +- include: upgrade_rpm_members.yml vars: - upgrade_version: '2.2' - tasks: - - include: rhel_tasks.yml - when: etcd_rpm_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + etcd_upgrade_version: '2.2' -- name: Upgrade containerized hosts to 2.2.5 - hosts: oo_etcd_hosts_to_upgrade - serial: 1 +- include: upgrade_image_members.yml vars: - upgrade_version: 2.2.5 - tasks: - - include: containerized_tasks.yml - when: etcd_container_version.stdout | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool + etcd_upgrade_version: '2.2.5' -- name: Upgrade RPM hosts to 2.3 - hosts: oo_etcd_hosts_to_upgrade - serial: 1 +- include: upgrade_rpm_members.yml vars: - upgrade_version: '2.3' - tasks: - - include: rhel_tasks.yml - when: etcd_rpm_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + etcd_upgrade_version: '2.3' -- name: Upgrade containerized hosts to 2.3.7 - hosts: oo_etcd_hosts_to_upgrade - serial: 1 +- include: upgrade_image_members.yml vars: - upgrade_version: 2.3.7 - tasks: - - include: containerized_tasks.yml - when: etcd_container_version.stdout | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool + etcd_upgrade_version: '2.3.7' -- name: Upgrade RPM hosts to 3.0 - hosts: oo_etcd_hosts_to_upgrade - serial: 1 +- include: upgrade_rpm_members.yml vars: - upgrade_version: '3.0' - tasks: - - include: rhel_tasks.yml - when: etcd_rpm_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + etcd_upgrade_version: '3.0' -- name: Upgrade containerized hosts to etcd3 image - hosts: oo_etcd_hosts_to_upgrade - serial: 1 +- include: upgrade_image_members.yml vars: - upgrade_version: 3.0.15 - tasks: - - include: containerized_tasks.yml - when: etcd_container_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool + etcd_upgrade_version: '3.0.15' + +- include: upgrade_rpm_members.yml + vars: + etcd_upgrade_version: '3.1' + +- include: upgrade_image_members.yml + vars: + etcd_upgrade_version: '3.1.3' - name: Upgrade fedora to latest hosts: oo_etcd_hosts_to_upgrade serial: 1 tasks: - - include: fedora_tasks.yml - when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool + - include_role: + name: etcd_upgrade + when: + - ansible_distribution == 'Fedora' + - not openshift.common.is_containerized | bool - name: Backup etcd include: backup.yml vars: - backup_tag: "post-3.0-" + etcd_backup_tag: "post-3.0-" when: openshift_etcd_backup | default(true) | bool diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_image_members.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_image_members.yml new file mode 100644 index 000000000..831ca8f57 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_image_members.yml @@ -0,0 +1,17 @@ +--- +# INPUT etcd_upgrade_version +# INPUT etcd_container_version +# INPUT openshift.common.is_containerized +- name: Upgrade containerized hosts to {{ etcd_upgrade_version }} + hosts: oo_etcd_hosts_to_upgrade + serial: 1 + roles: + - role: etcd_upgrade + r_etcd_upgrade_action: upgrade + r_etcd_upgrade_mechanism: image + r_etcd_upgrade_version: "{{ etcd_upgrade_version }}" + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" + etcd_peer: "{{ openshift.common.hostname }}" + when: + - etcd_container_version | default('99') | version_compare(etcd_upgrade_version,'<') + - openshift.common.is_containerized | bool diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_rpm_members.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_rpm_members.yml new file mode 100644 index 000000000..2e79451e0 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade_rpm_members.yml @@ -0,0 +1,18 @@ +--- +# INPUT etcd_upgrade_version +# INPUT etcd_rpm_version +# INPUT openshift.common.is_containerized +- name: Upgrade to {{ etcd_upgrade_version }} + hosts: oo_etcd_hosts_to_upgrade + serial: 1 + roles: + - role: etcd_upgrade + r_etcd_upgrade_action: upgrade + r_etcd_upgrade_mechanism: rpm + r_etcd_upgrade_version: "{{ etcd_upgrade_version }}" + r_etcd_common_etcd_runtime: "host" + etcd_peer: "{{ openshift.common.hostname }}" + when: + - etcd_rpm_version.stdout | default('99') | version_compare(etcd_upgrade_version, '<') + - ansible_distribution == 'RedHat' + - not openshift.common.is_containerized | bool diff --git a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml index 8a60ef236..4cf434dab 100644 --- a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml @@ -10,7 +10,7 @@ router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', openshift_image_tag ) }}" registry_console_image: "{{ openshift.master.registry_url | replace ( '${component}', 'registry-console') | - replace ( '${version}', openshift.common.short_version ) }}" + replace ( '${version}', 'v' ~ openshift.common.short_version ) }}" pre_tasks: - name: Load lib_openshift modules @@ -124,7 +124,7 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: enable + roles: + - role: openshift_excluder + r_openshift_excluder_action: enable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 0ad934d2d..e10c4c540 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -107,8 +107,8 @@ yedit: src: "{{ openshift.common.config_base }}/master/master-config.yaml" key: 'imageConfig.format' - value: "{{ oreg_url }}" - when: oreg_url is defined + value: "{{ oreg_url | default(oreg_url_master) }}" + when: oreg_url is defined or oreg_url_master is defined # Run the upgrade hook prior to restarting services/system if defined: - debug: msg="Running master upgrade hook {{ openshift_master_upgrade_hook }}" diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 2a5ac0aef..4d455fe0a 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -34,6 +34,9 @@ - openshift_facts - docker - openshift_node_upgrade + - role: openshift_excluder + r_openshift_excluder_action: enable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" post_tasks: - name: Set node schedulability @@ -46,13 +49,3 @@ register: node_schedulable until: node_schedulable|succeeded when: node_unschedulable|changed - -- name: Re-enable excluder if it was previously enabled - hosts: oo_nodes_to_config - tags: - - always - gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: enable diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/validator.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/validator.yml index ac5704f69..78c1767b8 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/validator.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/validator.yml @@ -7,4 +7,6 @@ hosts: oo_first_master roles: - { role: lib_openshift } - tasks: [] + tasks: + - name: Check for invalid namespaces and SDN errors + oc_objectvalidator: diff --git a/playbooks/common/openshift-cluster/validate_hostnames.yml b/playbooks/common/openshift-cluster/validate_hostnames.yml index 48cc03b19..33fc5630f 100644 --- a/playbooks/common/openshift-cluster/validate_hostnames.yml +++ b/playbooks/common/openshift-cluster/validate_hostnames.yml @@ -13,4 +13,6 @@ pause: prompt: "The hostname \"{{ openshift.common.hostname }}\" for \"{{ ansible_nodename }}\" doesn't resolve to an ip address owned by this host. Please set openshift_hostname variable to a hostname that when resolved on the host in question resolves to an IP address matching an interface on this host. This host will fail liveness checks for pods utilizing hostPorts, press ENTER to continue or CTRL-C to abort." seconds: "{{ 10 if openshift_override_hostname_check | default(false) | bool else omit }}" - when: lookupip.stdout not in ansible_all_ipv4_addresses + when: + - lookupip.stdout != '127.0.0.1' + - lookupip.stdout not in ansible_all_ipv4_addresses diff --git a/playbooks/common/openshift-etcd/config.yml b/playbooks/common/openshift-etcd/config.yml index 1b8106e0e..2cb6197d1 100644 --- a/playbooks/common/openshift-etcd/config.yml +++ b/playbooks/common/openshift-etcd/config.yml @@ -7,4 +7,5 @@ etcd_peers: "{{ groups.oo_etcd_to_config | default([], true) }}" etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}" etcd_certificates_etcd_hosts: "{{ groups.oo_etcd_to_config | default([], true) }}" + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" - role: nickhammond.logrotate diff --git a/playbooks/common/openshift-master/scaleup.yml b/playbooks/common/openshift-master/scaleup.yml index 49594d294..bc61ee9bb 100644 --- a/playbooks/common/openshift-master/scaleup.yml +++ b/playbooks/common/openshift-master/scaleup.yml @@ -65,10 +65,10 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: disable + roles: + - role: openshift_excluder + r_openshift_excluder_action: disable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" - include: ../openshift-master/config.yml @@ -81,7 +81,7 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: enable + roles: + - role: openshift_excluder + r_openshift_excluder_action: enable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" diff --git a/playbooks/common/openshift-node/restart.yml b/playbooks/common/openshift-node/restart.yml index 441b100e9..01cf948e0 100644 --- a/playbooks/common/openshift-node/restart.yml +++ b/playbooks/common/openshift-node/restart.yml @@ -51,7 +51,7 @@ register: node_output delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_config - until: node_output.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True + until: node_output.results.returncode == 0 and node_output.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True # Give the node two minutes to come back online. retries: 24 delay: 5 diff --git a/playbooks/common/openshift-node/scaleup.yml b/playbooks/common/openshift-node/scaleup.yml index d94df553c..40da8990d 100644 --- a/playbooks/common/openshift-node/scaleup.yml +++ b/playbooks/common/openshift-node/scaleup.yml @@ -32,10 +32,10 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: disable + roles: + - role: openshift_excluder + r_openshift_excluder_action: disable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" - include: ../openshift-node/config.yml @@ -44,7 +44,7 @@ tags: - always gather_facts: no - tasks: - - include_role: - name: openshift_excluder - tasks_from: enable + roles: + - role: openshift_excluder + r_openshift_excluder_action: enable + r_openshift_excluder_service_type: "{{ openshift.common.service_type }}" |