From 75354f645ffca8c30c4bc8ac57112c8df65398dd Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 31 Oct 2016 10:23:10 -0400 Subject: Uninstall etcd3 package --- playbooks/adhoc/uninstall.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 789f66b14..43a4ef169 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -338,6 +338,7 @@ failed_when: False with_items: - etcd + - etcd3 - firewalld - name: Stop additional atomic services @@ -352,6 +353,7 @@ when: not is_atomic | bool with_items: - etcd + - etcd3 - shell: systemctl reset-failed changed_when: False -- cgit v1.2.3 From 98e4d9600bb193c772c3f03ac3b810798734c168 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 31 Oct 2016 10:24:15 -0400 Subject: Move backups to a separate file for re-use --- .../openshift-cluster/upgrades/etcd_backup.yml | 73 +++++++++++++++++++++ .../upgrades/upgrade_control_plane.yml | 74 +--------------------- 2 files changed, 74 insertions(+), 73 deletions(-) create mode 100644 playbooks/common/openshift-cluster/upgrades/etcd_backup.yml diff --git a/playbooks/common/openshift-cluster/upgrades/etcd_backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd_backup.yml new file mode 100644 index 000000000..57b156b1c --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd_backup.yml @@ -0,0 +1,73 @@ +- name: Backup etcd + hosts: etcd_hosts_to_backup + vars: + embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + roles: + - openshift_facts + tasks: + # Ensure we persist the etcd role for this host in openshift_facts + - openshift_facts: + role: etcd + local_facts: {} + when: "'etcd' not in openshift" + + - stat: path=/var/lib/openshift + register: var_lib_openshift + + - stat: path=/var/lib/origin + register: var_lib_origin + + - name: Create origin symlink if necessary + file: src=/var/lib/openshift/ dest=/var/lib/origin state=link + when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + + # TODO: replace shell module with command and update later checks + # We assume to be using the data dir for all backups. + - name: Check available disk space for etcd backup + shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 + register: avail_disk + + # TODO: replace shell module with command and update later checks + - name: Check current embedded etcd disk usage + shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 + register: etcd_disk_usage + when: embedded_etcd | bool + + - name: Abort if insufficient disk space for etcd backup + fail: + msg: > + {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ avail_disk.stdout }} Kb available. + when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + + - name: Install etcd (for etcdctl) + action: "{{ ansible_pkg_mgr }} name=etcd state=present" + when: not openshift.common.is_atomic | bool + + - name: Generate etcd backup + command: > + etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} + --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + + - set_fact: + etcd_backup_complete: True + + - name: Display location of etcd backup + debug: + msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" + +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.etcd_hosts_to_backup) + | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" + - fail: + msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 764563d28..e1342f76b 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -27,79 +27,7 @@ embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - name: Backup etcd - hosts: etcd_hosts_to_backup - vars: - embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - roles: - - openshift_facts - tasks: - # Ensure we persist the etcd role for this host in openshift_facts - - openshift_facts: - role: etcd - local_facts: {} - when: "'etcd' not in openshift" - - - stat: path=/var/lib/openshift - register: var_lib_openshift - - - stat: path=/var/lib/origin - register: var_lib_origin - - - name: Create origin symlink if necessary - file: src=/var/lib/openshift/ dest=/var/lib/origin state=link - when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False - - # TODO: replace shell module with command and update later checks - # We assume to be using the data dir for all backups. - - name: Check available disk space for etcd backup - shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 - register: avail_disk - - # TODO: replace shell module with command and update later checks - - name: Check current embedded etcd disk usage - shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: embedded_etcd | bool - - - name: Abort if insufficient disk space for etcd backup - fail: - msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) - - - name: Install etcd (for etcdctl) - action: "{{ ansible_pkg_mgr }} name=etcd state=installed" - when: not openshift.common.is_atomic | bool - - - name: Generate etcd backup - command: > - etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} - --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }} - - - set_fact: - etcd_backup_complete: True - - - name: Display location of etcd backup - debug: - msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}" - - -- name: Gate on etcd backup - hosts: localhost - connection: local - become: no - tasks: - - set_fact: - etcd_backup_completed: "{{ hostvars - | oo_select_keys(groups.etcd_hosts_to_backup) - | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" - - set_fact: - etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" - - fail: - msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" - when: etcd_backup_failed | length > 0 + include: etcd_backup.yml - name: Upgrade master packages hosts: oo_masters_to_config -- cgit v1.2.3 From 00d007565aff754cb282631f89de93c1c9cc0a46 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 31 Oct 2016 10:26:08 -0400 Subject: Drop /etc/profile.d/etcdctl.sh Includes bash functions for etcdctl2 and etcdctl3 which provide reasonable defaults for etcdctl functions on a host that's configured with openshift_etcd. --- playbooks/adhoc/uninstall.yml | 1 + roles/etcd/files/etcdctl.sh | 11 +++++++++++ roles/etcd/tasks/etcdctl.yml | 11 +++++++++++ roles/etcd/tasks/main.yml | 3 +++ 4 files changed, 26 insertions(+) create mode 100644 roles/etcd/files/etcdctl.sh create mode 100644 roles/etcd/tasks/etcdctl.yml diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 43a4ef169..4ea639cbe 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -367,6 +367,7 @@ - /etc/ansible/facts.d/openshift.fact - /etc/etcd - /etc/systemd/system/etcd_container.service + - /etc/profile.d/etcdctl.sh # Intenationally using rm command over file module because if someone had mounted a filesystem # at /var/lib/etcd then the contents was not removed correctly diff --git a/roles/etcd/files/etcdctl.sh b/roles/etcd/files/etcdctl.sh new file mode 100644 index 000000000..0e324a8a9 --- /dev/null +++ b/roles/etcd/files/etcdctl.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Sets up handy aliases for etcd, need etcdctl2 and etcdctl3 because +# command flags are different between the two. Should work on stand +# alone etcd hosts and master + etcd hosts too because we use the peer keys. +etcdctl2() { + /usr/bin/etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://`hostname`:2379 ${@} +} + +etcdctl3() { + ETCDCTL_API=3 /usr/bin/etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints https://`hostname`:2379 ${@} +} diff --git a/roles/etcd/tasks/etcdctl.yml b/roles/etcd/tasks/etcdctl.yml new file mode 100644 index 000000000..32c176449 --- /dev/null +++ b/roles/etcd/tasks/etcdctl.yml @@ -0,0 +1,11 @@ +- name: Install etcd for etcdctl + action: "{{ ansible_pkg_mgr }} name=etcd state=present" + when: not openshift.common.is_atomic | bool + +- name: Configure etcd profile.d alises + copy: + src: etcdctl.sh + dest: /etc/profile.d/etcdctl.sh + mode: 0755 + owner: root + group: root diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 2bc6a8678..790eb3c5a 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -74,5 +74,8 @@ enabled: yes register: start_result +- include: etcdctl.yml + when: openshift_etcd_etcdctl_profile | default(true) | bool + - set_fact: etcd_service_status_changed: "{{ start_result | changed }}" -- cgit v1.2.3 From a5f4561d8cdfd974705105275f718cee9bb249ad Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 31 Oct 2016 10:30:18 -0400 Subject: Add etcd upgrade for RHEL and Fedora On Fedora we just blindly upgrade to the latest. On RHEL we do stepwise upgrades 2.0,2.1,2.2,2.3,3.0 --- .../openshift-cluster/upgrades/upgrade_etcd.yml | 26 +++++++ .../openshift-cluster/upgrades/etcd/backup.yml | 73 ++++++++++++++++++ .../upgrades/etcd/fedora_tasks.yml | 23 ++++++ .../openshift-cluster/upgrades/etcd/filter_plugins | 1 + .../openshift-cluster/upgrades/etcd/lookup_plugins | 1 + .../openshift-cluster/upgrades/etcd/main.yml | 88 ++++++++++++++++++++++ .../openshift-cluster/upgrades/etcd/rhel_tasks.yml | 23 ++++++ .../common/openshift-cluster/upgrades/etcd/roles | 1 + .../openshift-cluster/upgrades/etcd_backup.yml | 73 ------------------ .../upgrades/upgrade_control_plane.yml | 2 +- 10 files changed, 237 insertions(+), 74 deletions(-) create mode 100644 playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml create mode 100644 playbooks/common/openshift-cluster/upgrades/etcd/backup.yml create mode 100644 playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml create mode 120000 playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins create mode 120000 playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins create mode 100644 playbooks/common/openshift-cluster/upgrades/etcd/main.yml create mode 100644 playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml create mode 120000 playbooks/common/openshift-cluster/upgrades/etcd/roles delete mode 100644 playbooks/common/openshift-cluster/upgrades/etcd_backup.yml diff --git a/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml b/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml new file mode 100644 index 000000000..c25f96212 --- /dev/null +++ b/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml @@ -0,0 +1,26 @@ +--- +- include: ../../../common/openshift-cluster/verify_ansible_version.yml + +- name: Create initial host groups for localhost + hosts: localhost + connection: local + become: no + gather_facts: no + tags: + - always + tasks: + - include_vars: ../cluster_hosts.yml + - add_host: + name: "{{ item }}" + groups: l_oo_all_hosts + with_items: "{{ g_all_hosts | default([]) }}" + +- name: Create initial host groups for all hosts + hosts: l_oo_all_hosts + gather_facts: no + tags: + - always + tasks: + - include_vars: ../cluster_hosts.yml + +- include: ../../../common/openshift-cluster/upgrades/etcd/main.yml diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml new file mode 100644 index 000000000..57b156b1c --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -0,0 +1,73 @@ +- name: Backup etcd + hosts: etcd_hosts_to_backup + vars: + embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + roles: + - openshift_facts + tasks: + # Ensure we persist the etcd role for this host in openshift_facts + - openshift_facts: + role: etcd + local_facts: {} + when: "'etcd' not in openshift" + + - stat: path=/var/lib/openshift + register: var_lib_openshift + + - stat: path=/var/lib/origin + register: var_lib_origin + + - name: Create origin symlink if necessary + file: src=/var/lib/openshift/ dest=/var/lib/origin state=link + when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + + # TODO: replace shell module with command and update later checks + # We assume to be using the data dir for all backups. + - name: Check available disk space for etcd backup + shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 + register: avail_disk + + # TODO: replace shell module with command and update later checks + - name: Check current embedded etcd disk usage + shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 + register: etcd_disk_usage + when: embedded_etcd | bool + + - name: Abort if insufficient disk space for etcd backup + fail: + msg: > + {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ avail_disk.stdout }} Kb available. + when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + + - name: Install etcd (for etcdctl) + action: "{{ ansible_pkg_mgr }} name=etcd state=present" + when: not openshift.common.is_atomic | bool + + - name: Generate etcd backup + command: > + etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} + --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + + - set_fact: + etcd_backup_complete: True + + - name: Display location of etcd backup + debug: + msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" + +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.etcd_hosts_to_backup) + | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" + - fail: + msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml new file mode 100644 index 000000000..30232110e --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml @@ -0,0 +1,23 @@ +--- +# F23 GA'd with etcd 2.0, currently has 2.2 in updates +# F24 GA'd with etcd-2.2, currently has 2.2 in updates +# F25 Beta currently has etcd 3.0 +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd + package: + name: "etcd" + state: "latest" + +- name: Restart etcd + service: + name: etcd + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins new file mode 120000 index 000000000..27ddaa18b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins @@ -0,0 +1 @@ +../../../../../filter_plugins \ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins new file mode 120000 index 000000000..cf407f69b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins @@ -0,0 +1 @@ +../../../../../lookup_plugins \ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml new file mode 100644 index 000000000..e1c092dbe --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -0,0 +1,88 @@ +--- +# For 1.4/3.4 we want to upgrade everyone to etcd-3.0. etcd docs say to +# upgrade from 2.0.x to 2.1.x to 2.2.x to 2.3.x to 3.0.x. While this is a tedius +# task for RHEL and CENTOS it's simply not possible in Fedora unless you've +# mirrored packages on your own because only the GA and latest versions are +# available in the repos. So for Fedora we'll simply skip this, sorry. + +- include: ../../evaluate_groups.yml + tags: + - always + +- name: Evaluate additional groups for upgrade + hosts: localhost + connection: local + become: no + tasks: + - name: Evaluate etcd_hosts_to_upgrade + add_host: + name: "{{ item }}" + groups: etcd_hosts_to_upgrade, etcd_hosts_to_backup + with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master }}" + +- name: Backup etcd before upgrading anything + include: backup.yml + vars: + backup_tag: "pre-upgrade-" + +- name: Drop etcdctl profiles + hosts: etcd_hosts_to_upgrade + tasks: + - include: roles/etcd/tasks/etcdctl.yml + +- name: Determine etcd version + hosts: etcd_hosts_to_upgrade + tasks: + - name: Record etcd version + command: rpm -q --qf '%{version}' etcd + register: etcd_installed_version + +# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop +# through hosts, then loop through tasks only when appropriate +- name: Upgrade to 2.1 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.1' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | version_compare('2.1','<') and ansible_distribution == 'RedHat' + +- name: Upgrade to 2.2 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.2' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | version_compare('2.2','<') and ansible_distribution == 'RedHat' + +- name: Upgrade to 2.3 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.3' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | version_compare('2.3','<') and ansible_distribution == 'RedHat' + +- name: Upgrade to 3.0 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '3.0' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | version_compare('3.0','<') and ansible_distribution == 'RedHat' + +- name: Upgrade fedora to latest + hosts: etcd_hosts_to_upgrade + serial: 1 + tasks: + - include: fedora_tasks.yml + when: ansible_distribution == 'Fedora' + +- name: Backup etcd + include: backup.yml + vars: + backup_tag: "post-3.0-" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml new file mode 100644 index 000000000..8e7dc9d9b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml @@ -0,0 +1,23 @@ +--- +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd package but exclude etcd3 + command: "{{ ansible_pkg_mgr }} install -y etcd-{{ upgrade_version }}\\* --exclude etcd3" + when: upgrade_version | version_compare('3.0','<') + +- name: Update etcd package not excluding etcd3 + command: "{{ ansible_pkg_mgr }} install -y etcd3-{{ upgrade_version }}\\*" + when: not upgrade_version | version_compare('3.0','<') + +- name: Restart etcd + service: + name: etcd + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/roles b/playbooks/common/openshift-cluster/upgrades/etcd/roles new file mode 120000 index 000000000..6bc1a7aef --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/roles @@ -0,0 +1 @@ +../../../../../roles \ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd_backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd_backup.yml deleted file mode 100644 index 57b156b1c..000000000 --- a/playbooks/common/openshift-cluster/upgrades/etcd_backup.yml +++ /dev/null @@ -1,73 +0,0 @@ -- name: Backup etcd - hosts: etcd_hosts_to_backup - vars: - embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - roles: - - openshift_facts - tasks: - # Ensure we persist the etcd role for this host in openshift_facts - - openshift_facts: - role: etcd - local_facts: {} - when: "'etcd' not in openshift" - - - stat: path=/var/lib/openshift - register: var_lib_openshift - - - stat: path=/var/lib/origin - register: var_lib_origin - - - name: Create origin symlink if necessary - file: src=/var/lib/openshift/ dest=/var/lib/origin state=link - when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False - - # TODO: replace shell module with command and update later checks - # We assume to be using the data dir for all backups. - - name: Check available disk space for etcd backup - shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 - register: avail_disk - - # TODO: replace shell module with command and update later checks - - name: Check current embedded etcd disk usage - shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: embedded_etcd | bool - - - name: Abort if insufficient disk space for etcd backup - fail: - msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) - - - name: Install etcd (for etcdctl) - action: "{{ ansible_pkg_mgr }} name=etcd state=present" - when: not openshift.common.is_atomic | bool - - - name: Generate etcd backup - command: > - etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} - --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} - - - set_fact: - etcd_backup_complete: True - - - name: Display location of etcd backup - debug: - msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" - -- name: Gate on etcd backup - hosts: localhost - connection: local - become: no - tasks: - - set_fact: - etcd_backup_completed: "{{ hostvars - | oo_select_keys(groups.etcd_hosts_to_backup) - | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" - - set_fact: - etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" - - fail: - msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" - when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index e1342f76b..339af3d0a 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -27,7 +27,7 @@ embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - name: Backup etcd - include: etcd_backup.yml + include: ./etcd/backup.yml - name: Upgrade master packages hosts: oo_masters_to_config -- cgit v1.2.3 From ce4034672fa8f2371b8bbf52ff04747583b4edda Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Thu, 10 Nov 2016 17:02:24 -0500 Subject: Add updates for containerized --- README_CONTAINERIZED_INSTALLATION.md | 10 +++--- .../upgrades/etcd/containerized_tasks.yml | 39 ++++++++++++++++++++++ .../upgrades/etcd/files/etcdctl.sh | 1 + .../openshift-cluster/upgrades/etcd/main.yml | 21 ++++++++---- roles/etcd/etcdctl.sh | 11 ++++++ roles/openshift_facts/library/openshift_facts.py | 6 ++-- 6 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml create mode 120000 playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh create mode 100644 roles/etcd/etcdctl.sh diff --git a/README_CONTAINERIZED_INSTALLATION.md b/README_CONTAINERIZED_INSTALLATION.md index c615154ef..0a0ebb836 100644 --- a/README_CONTAINERIZED_INSTALLATION.md +++ b/README_CONTAINERIZED_INSTALLATION.md @@ -31,7 +31,7 @@ native clients. The wrapper scripts mount a limited subset of paths, _~/.kube_, _/etc/origin/_, and _/tmp_. Be mindful of this when passing in files to be processed by `oc` or `oadm`. You may find it easier to redirect input like this : - + `oc create -f - < my_file.json` ## Technical Notes @@ -48,18 +48,18 @@ before attempting to pull any of the following images. openshift/origin openshift/node (node + openshift-sdn + openvswitch rpm for client tools) openshift/openvswitch (centos7 + openvswitch rpm, runs ovsdb ovsctl processes) - registry.access.redhat.com/rhel7/etcd + registry.access.redhat.com/rhel7/etcd3 OpenShift Enterprise openshift3/ose openshift3/node openshift3/openvswitch - registry.access.redhat.com/rhel7/etcd + registry.access.redhat.com/rhel7/etcd3 Atomic Enterprise Platform aep3/aep aep3/node aep3/openvswitch - registry.access.redhat.com/rhel7/etcd - + registry.access.redhat.com/rhel7/etcd3 + * note openshift3/* and aep3/* images come from registry.access.redhat.com and rely on the --additional-repository flag being set appropriately. diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml new file mode 100644 index 000000000..2cbe3b729 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml @@ -0,0 +1,39 @@ +--- +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update systemd unit + openshift_facts: + role: etcd + +- name: Set etcd3 image name + set_fact: + new_etcd_image: "{{ openshift.etcd.etcd_image | regex_replace('/etcd$','/etcd3') }}" + +- name: Pull new etcd image + command: "docker pull {{ new_etcd_image }}" + +- name: Update to latest etcd image + replace: + dest: /etc/systemd/system/etcd_container.service + regexp: "{{ openshift.etcd.etcd_image }}$" + replace: "{{ new_etcd_image }}" + +- name: Update etcd package not excluding etcd3 + systemd: + name: etcd_container + daemon_reload: yes + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 + +- name: Store new etcd_image + openshift_facts: + role: etcd + local_facts: + - etcd_image: "{{ new_etcd_image }}" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh new file mode 120000 index 000000000..641e04e44 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh @@ -0,0 +1 @@ +../roles/etcd/files/etcdctl.sh \ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml index e1c092dbe..22af9e7e2 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -34,8 +34,10 @@ hosts: etcd_hosts_to_upgrade tasks: - name: Record etcd version - command: rpm -q --qf '%{version}' etcd + command: rpm -qa --qf '%{version}' etcd\* register: etcd_installed_version + failed_when: false + when: not openshift.common.is_containerized | bool # I really dislike this copy/pasta but I wasn't able to find a way to get it to loop # through hosts, then loop through tasks only when appropriate @@ -46,7 +48,7 @@ upgrade_version: '2.1' tasks: - include: rhel_tasks.yml - when: etcd_installed_version.stdout | version_compare('2.1','<') and ansible_distribution == 'RedHat' + when: etcd_installed_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool - name: Upgrade to 2.2 hosts: etcd_hosts_to_upgrade @@ -55,7 +57,7 @@ upgrade_version: '2.2' tasks: - include: rhel_tasks.yml - when: etcd_installed_version.stdout | version_compare('2.2','<') and ansible_distribution == 'RedHat' + when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool - name: Upgrade to 2.3 hosts: etcd_hosts_to_upgrade @@ -64,7 +66,7 @@ upgrade_version: '2.3' tasks: - include: rhel_tasks.yml - when: etcd_installed_version.stdout | version_compare('2.3','<') and ansible_distribution == 'RedHat' + when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool - name: Upgrade to 3.0 hosts: etcd_hosts_to_upgrade @@ -73,14 +75,21 @@ upgrade_version: '3.0' tasks: - include: rhel_tasks.yml - when: etcd_installed_version.stdout | version_compare('3.0','<') and ansible_distribution == 'RedHat' + when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool - name: Upgrade fedora to latest hosts: etcd_hosts_to_upgrade serial: 1 tasks: - include: fedora_tasks.yml - when: ansible_distribution == 'Fedora' + when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to etcd3 image + hosts: etcd_hosts_to_upgrade + serial: 1 + tasks: + - include: containerized_tasks.yml + when: openshift.common.is_containerized | bool - name: Backup etcd include: backup.yml diff --git a/roles/etcd/etcdctl.sh b/roles/etcd/etcdctl.sh new file mode 100644 index 000000000..0e324a8a9 --- /dev/null +++ b/roles/etcd/etcdctl.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Sets up handy aliases for etcd, need etcdctl2 and etcdctl3 because +# command flags are different between the two. Should work on stand +# alone etcd hosts and master + etcd hosts too because we use the peer keys. +etcdctl2() { + /usr/bin/etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://`hostname`:2379 ${@} +} + +etcdctl3() { + ETCDCTL_API=3 /usr/bin/etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints https://`hostname`:2379 ${@} +} diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index a28b58e85..d797eb4d3 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -1595,7 +1595,7 @@ def set_container_facts_if_unset(facts): cli_image = master_image node_image = 'openshift3/node' ovs_image = 'openshift3/openvswitch' - etcd_image = 'registry.access.redhat.com/rhel7/etcd' + etcd_image = 'registry.access.redhat.com/rhel7/etcd3' pod_image = 'openshift3/ose-pod' router_image = 'openshift3/ose-haproxy-router' registry_image = 'openshift3/ose-docker-registry' @@ -1605,7 +1605,7 @@ def set_container_facts_if_unset(facts): cli_image = master_image node_image = 'aep3_beta/node' ovs_image = 'aep3_beta/openvswitch' - etcd_image = 'registry.access.redhat.com/rhel7/etcd' + etcd_image = 'registry.access.redhat.com/rhel7/etcd3' pod_image = 'aep3_beta/aep-pod' router_image = 'aep3_beta/aep-haproxy-router' registry_image = 'aep3_beta/aep-docker-registry' @@ -1615,7 +1615,7 @@ def set_container_facts_if_unset(facts): cli_image = master_image node_image = 'openshift/node' ovs_image = 'openshift/openvswitch' - etcd_image = 'registry.access.redhat.com/rhel7/etcd' + etcd_image = 'registry.access.redhat.com/rhel7/etcd3' pod_image = 'openshift/origin-pod' router_image = 'openshift/origin-haproxy-router' registry_image = 'openshift/origin-docker-registry' -- cgit v1.2.3 From 7dae2f94927b88ab96a6f447e1595a6f2ef88b9b Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 14 Nov 2016 14:18:37 -0500 Subject: Make etcd containerized upgrade stepwise --- .../upgrades/etcd/containerized_tasks.yml | 24 ++++++++---- .../openshift-cluster/upgrades/etcd/main.yml | 45 +++++++++++++++++----- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml index 2cbe3b729..9ace7db25 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml @@ -2,13 +2,14 @@ - name: Verify cluster is healthy pre-upgrade command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" -- name: Update systemd unit - openshift_facts: - role: etcd +- name: Get current image + shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}' + register: current_image -- name: Set etcd3 image name +- name: Set new_etcd_image set_fact: - new_etcd_image: "{{ openshift.etcd.etcd_image | regex_replace('/etcd$','/etcd3') }}" + new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd3:' ~ upgrade_version ) if upgrade_version | version_compare('3.0','>=') + else current_image.stdout.split(':')[0] ~ ':' ~ upgrade_version }}" - name: Pull new etcd image command: "docker pull {{ new_etcd_image }}" @@ -16,15 +17,22 @@ - name: Update to latest etcd image replace: dest: /etc/systemd/system/etcd_container.service - regexp: "{{ openshift.etcd.etcd_image }}$" + regexp: "{{ current_image.stdout }}$" replace: "{{ new_etcd_image }}" -- name: Update etcd package not excluding etcd3 +- name: Restart etcd_container systemd: name: etcd_container daemon_reload: yes state: restarted +## TODO: probably should just move this into the backup playbooks, also this +## will fail on atomic host. We need to revisit how to do etcd backups there as +## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) +- name: Upgrade etcdctl to 3.0 if necessary and able + action: "{{ ansible_pkg_mgr }} name=etcd ensure=latest" + when: openshift.common.is_containerized | bool and not openshift.common.is_atomic | bool and upgrade_version | version_compare('3.0','>=') + - name: Verify cluster is healthy command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" register: etcdctl @@ -36,4 +44,4 @@ openshift_facts: role: etcd local_facts: - - etcd_image: "{{ new_etcd_image }}" + etcd_image: "{{ new_etcd_image }}" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml index 22af9e7e2..cce844403 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -33,11 +33,16 @@ - name: Determine etcd version hosts: etcd_hosts_to_upgrade tasks: - - name: Record etcd version + - name: Record RPM based etcd version command: rpm -qa --qf '%{version}' etcd\* register: etcd_installed_version failed_when: false when: not openshift.common.is_containerized | bool + - name: Record containerized etcd version + command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* + register: etcd_installed_version + failed_when: false + when: openshift.common.is_containerized | bool # I really dislike this copy/pasta but I wasn't able to find a way to get it to loop # through hosts, then loop through tasks only when appropriate @@ -50,7 +55,7 @@ - include: rhel_tasks.yml when: etcd_installed_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool -- name: Upgrade to 2.2 +- name: Upgrade RPM hosts to 2.2 hosts: etcd_hosts_to_upgrade serial: 1 vars: @@ -59,7 +64,16 @@ - include: rhel_tasks.yml when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool -- name: Upgrade to 2.3 +- name: Upgrade containerized hosts to 2.2.5 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 2.2.5 + tasks: + - include: containerized_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.3 hosts: etcd_hosts_to_upgrade serial: 1 vars: @@ -68,7 +82,16 @@ - include: rhel_tasks.yml when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool -- name: Upgrade to 3.0 +- name: Upgrade containerized hosts to 2.3.7 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 2.3.7 + tasks: + - include: containerized_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 3.0 hosts: etcd_hosts_to_upgrade serial: 1 vars: @@ -77,19 +100,21 @@ - include: rhel_tasks.yml when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool -- name: Upgrade fedora to latest +- name: Upgrade containerized hosts to etcd3 image hosts: etcd_hosts_to_upgrade serial: 1 + vars: + upgrade_version: 3.0.3 tasks: - - include: fedora_tasks.yml - when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool + - include: containerized_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool -- name: Upgrade containerized hosts to etcd3 image +- name: Upgrade fedora to latest hosts: etcd_hosts_to_upgrade serial: 1 tasks: - - include: containerized_tasks.yml - when: openshift.common.is_containerized | bool + - include: fedora_tasks.yml + when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool - name: Backup etcd include: backup.yml -- cgit v1.2.3 From 371c36eae16644bda921550371da713e63e44fac Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 14 Nov 2016 14:25:51 -0500 Subject: Actually upgrade host etcdctl no matter what --- .../common/openshift-cluster/upgrades/etcd/containerized_tasks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml index 9ace7db25..35f391f8c 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml @@ -29,9 +29,9 @@ ## TODO: probably should just move this into the backup playbooks, also this ## will fail on atomic host. We need to revisit how to do etcd backups there as ## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) -- name: Upgrade etcdctl to 3.0 if necessary and able +- name: Upgrade etcd for etcdctl when not atomic action: "{{ ansible_pkg_mgr }} name=etcd ensure=latest" - when: openshift.common.is_containerized | bool and not openshift.common.is_atomic | bool and upgrade_version | version_compare('3.0','>=') + when: not openshift.common.is_atomic | bool - name: Verify cluster is healthy command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" -- cgit v1.2.3