summaryrefslogtreecommitdiffstats
path: root/playbooks/common
diff options
context:
space:
mode:
authorBrenton Leanhardt <bleanhar@redhat.com>2016-01-13 10:07:35 -0500
committerBrenton Leanhardt <bleanhar@redhat.com>2016-01-13 10:07:35 -0500
commit965c614859a0318c956b4a9ab312c7c856facaa1 (patch)
tree389a40f3d2a5cbbd1591a83ab210f149f0abc06d /playbooks/common
parent607d45f426c7e86a256edc7fd442eb126867c243 (diff)
parent97be5890e2a34036a22d2d1e2586c83009ae6064 (diff)
downloadopenshift-965c614859a0318c956b4a9ab312c7c856facaa1.tar.gz
openshift-965c614859a0318c956b4a9ab312c7c856facaa1.tar.bz2
openshift-965c614859a0318c956b4a9ab312c7c856facaa1.tar.xz
openshift-965c614859a0318c956b4a9ab312c7c856facaa1.zip
Merge pull request #1121 from abutcher/rolling-restarts-pacemaker
Rolling restart playbook for masters
Diffstat (limited to 'playbooks/common')
-rw-r--r--playbooks/common/openshift-master/restart.yml141
-rw-r--r--playbooks/common/openshift-master/restart_hosts.yml39
-rw-r--r--playbooks/common/openshift-master/restart_hosts_pacemaker.yml25
-rw-r--r--playbooks/common/openshift-master/restart_services.yml27
-rw-r--r--playbooks/common/openshift-master/restart_services_pacemaker.yml10
5 files changed, 242 insertions, 0 deletions
diff --git a/playbooks/common/openshift-master/restart.yml b/playbooks/common/openshift-master/restart.yml
new file mode 100644
index 000000000..fa13a64cb
--- /dev/null
+++ b/playbooks/common/openshift-master/restart.yml
@@ -0,0 +1,141 @@
+---
+- include: ../openshift-cluster/evaluate_groups.yml
+
+- name: Validate configuration for rolling restart
+ hosts: oo_masters_to_config
+ roles:
+ - openshift_facts
+ tasks:
+ - fail:
+ msg: "openshift_rolling_restart_mode must be set to either 'services' or 'system'"
+ when: openshift_rolling_restart_mode is defined and openshift_rolling_restart_mode not in ["services", "system"]
+ - openshift_facts:
+ role: "{{ item.role }}"
+ local_facts: "{{ item.local_facts }}"
+ with_items:
+ - role: common
+ local_facts:
+ rolling_restart_mode: "{{ openshift_rolling_restart_mode | default('services') }}"
+ - role: master
+ local_facts:
+ cluster_method: "{{ openshift_master_cluster_method | default(None) }}"
+
+# Creating a temp file on localhost, we then check each system that will
+# be rebooted to see if that file exists, if so we know we're running
+# ansible on a machine that needs a reboot, and we need to error out.
+- name: Create temp file on localhost
+ hosts: localhost
+ connection: local
+ become: no
+ gather_facts: no
+ tasks:
+ - local_action: command mktemp
+ register: mktemp
+ changed_when: false
+
+- name: Check if temp file exists on any masters
+ hosts: oo_masters_to_config
+ tasks:
+ - stat: path="{{ hostvars.localhost.mktemp.stdout }}"
+ register: exists
+ changed_when: false
+
+- name: Cleanup temp file on localhost
+ hosts: localhost
+ connection: local
+ become: no
+ gather_facts: no
+ tasks:
+ - file: path="{{ hostvars.localhost.mktemp.stdout }}" state=absent
+ changed_when: false
+
+- name: Warn if restarting the system where ansible is running
+ hosts: oo_masters_to_config
+ tasks:
+ - pause:
+ prompt: >
+ Warning: Running playbook from a host that will be restarted!
+ Press CTRL+C and A to abort playbook execution. You may
+ continue by pressing ENTER but the playbook will stop
+ executing once this system restarts and services must be
+ manually verified.
+ when: exists.stat.exists and openshift.common.rolling_restart_mode == 'system'
+ - set_fact:
+ current_host: "{{ exists.stat.exists }}"
+ when: openshift.common.rolling_restart_mode == 'system'
+
+- name: Determine which masters are currently active
+ hosts: oo_masters_to_config
+ tasks:
+ - name: Check master service status
+ command: >
+ systemctl is-active {{ openshift.common.service_type }}-master
+ register: active_check_output
+ when: openshift.master.cluster_method == 'pacemaker'
+ failed_when: active_check_output.stdout not in ['active', 'inactive']
+ changed_when: false
+ - set_fact:
+ is_active: "{{ active_check_output.stdout == 'active' }}"
+ when: openshift.master.cluster_method == 'pacemaker'
+
+- name: Evaluate master groups
+ hosts: localhost
+ become: no
+ tasks:
+ - name: Evaluate oo_active_masters
+ add_host:
+ name: "{{ item }}"
+ groups: oo_active_masters
+ ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+ ansible_sudo: "{{ g_sudo | default(omit) }}"
+ with_items: "{{ groups.oo_masters_to_config | default([]) }}"
+ when: (hostvars[item]['is_active'] | default(false)) | bool
+ - name: Evaluate oo_current_masters
+ add_host:
+ name: "{{ item }}"
+ groups: oo_current_masters
+ ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+ ansible_sudo: "{{ g_sudo | default(omit) }}"
+ with_items: "{{ groups.oo_masters_to_config | default([]) }}"
+ when: (hostvars[item]['current_host'] | default(false)) | bool
+
+- name: Validate pacemaker cluster
+ hosts: oo_active_masters
+ tasks:
+ - name: Retrieve pcs status
+ command: pcs status
+ register: pcs_status_output
+ changed_when: false
+ - fail:
+ msg: >
+ Pacemaker cluster validation failed. One or more nodes are not online.
+ when: not (pcs_status_output.stdout | validate_pcs_cluster(groups.oo_masters_to_config)) | bool
+
+- name: Restart masters
+ hosts: oo_masters_to_config:!oo_active_masters:!oo_current_masters
+ vars:
+ openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
+ serial: 1
+ tasks:
+ - include: restart_hosts.yml
+ when: openshift.common.rolling_restart_mode == 'system'
+ - include: restart_services.yml
+ when: openshift.common.rolling_restart_mode == 'services'
+
+- name: Restart active masters
+ hosts: oo_active_masters
+ serial: 1
+ tasks:
+ - include: restart_hosts_pacemaker.yml
+ when: openshift.common.rolling_restart_mode == 'system'
+ - include: restart_services_pacemaker.yml
+ when: openshift.common.rolling_restart_mode == 'services'
+
+- name: Restart current masters
+ hosts: oo_current_masters
+ serial: 1
+ tasks:
+ - include: restart_hosts.yml
+ when: openshift.common.rolling_restart_mode == 'system'
+ - include: restart_services.yml
+ when: openshift.common.rolling_restart_mode == 'services'
diff --git a/playbooks/common/openshift-master/restart_hosts.yml b/playbooks/common/openshift-master/restart_hosts.yml
new file mode 100644
index 000000000..ff206f5a2
--- /dev/null
+++ b/playbooks/common/openshift-master/restart_hosts.yml
@@ -0,0 +1,39 @@
+- name: Restart master system
+ # https://github.com/ansible/ansible/issues/10616
+ shell: sleep 2 && shutdown -r now "OpenShift Ansible master rolling restart"
+ async: 1
+ poll: 0
+ ignore_errors: true
+ become: yes
+# When cluster_method != pacemaker we can ensure the api_port is
+# available.
+- name: Wait for master API to come back online
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ inventory_hostname }}"
+ state=started
+ delay=10
+ port="{{ openshift.master.api_port }}"
+ when: openshift.master.cluster_method != 'pacemaker'
+- name: Wait for master to start
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ inventory_hostname }}"
+ state=started
+ delay=10
+ port=22
+ when: openshift.master.cluster_method == 'pacemaker'
+- name: Wait for master to become available
+ command: pcs status
+ register: pcs_status_output
+ until: pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname]) | bool
+ retries: 15
+ delay: 2
+ changed_when: false
+ when: openshift.master.cluster_method == 'pacemaker'
+- fail:
+ msg: >
+ Pacemaker cluster validation failed {{ inventory hostname }} is not online.
+ when: openshift.master.cluster_method == 'pacemaker' and not (pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname])) | bool
diff --git a/playbooks/common/openshift-master/restart_hosts_pacemaker.yml b/playbooks/common/openshift-master/restart_hosts_pacemaker.yml
new file mode 100644
index 000000000..c9219e8de
--- /dev/null
+++ b/playbooks/common/openshift-master/restart_hosts_pacemaker.yml
@@ -0,0 +1,25 @@
+- name: Fail over master resource
+ command: >
+ pcs resource move master {{ hostvars | oo_select_keys(groups['oo_masters_to_config']) | oo_collect('openshift.common.hostname', {'is_active': 'False'}) | list | first }}
+- name: Wait for master API to come back online
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ openshift.master.cluster_hostname }}"
+ state=started
+ delay=10
+ port="{{ openshift.master.api_port }}"
+- name: Restart master system
+ # https://github.com/ansible/ansible/issues/10616
+ shell: sleep 2 && shutdown -r now "OpenShift Ansible master rolling restart"
+ async: 1
+ poll: 0
+ ignore_errors: true
+ become: yes
+- name: Wait for master to start
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ inventory_hostname }}"
+ state=started
+ delay=10
diff --git a/playbooks/common/openshift-master/restart_services.yml b/playbooks/common/openshift-master/restart_services.yml
new file mode 100644
index 000000000..5e539cd65
--- /dev/null
+++ b/playbooks/common/openshift-master/restart_services.yml
@@ -0,0 +1,27 @@
+- name: Restart master
+ service:
+ name: "{{ openshift.common.service_type }}-master"
+ state: restarted
+ when: not openshift_master_ha | bool
+- name: Restart master API
+ service:
+ name: "{{ openshift.common.service_type }}-master-api"
+ state: restarted
+ when: openshift_master_ha | bool and openshift.master.cluster_method != 'pacemaker'
+- name: Wait for master API to come back online
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ inventory_hostname }}"
+ state=started
+ delay=10
+ port="{{ openshift.master.api_port }}"
+ when: openshift_master_ha | bool and openshift.master.cluster_method != 'pacemaker'
+- name: Restart master controllers
+ service:
+ name: "{{ openshift.common.service_type }}-master-controllers"
+ state: restarted
+ # Ignore errrors since it is possible that type != simple for
+ # pre-3.1.1 installations.
+ ignore_errors: true
+ when: openshift_master_ha | bool and openshift.master.cluster_method != 'pacemaker'
diff --git a/playbooks/common/openshift-master/restart_services_pacemaker.yml b/playbooks/common/openshift-master/restart_services_pacemaker.yml
new file mode 100644
index 000000000..e738f3fb6
--- /dev/null
+++ b/playbooks/common/openshift-master/restart_services_pacemaker.yml
@@ -0,0 +1,10 @@
+- name: Restart master services
+ command: pcs resource restart master
+- name: Wait for master API to come back online
+ become: no
+ local_action:
+ module: wait_for
+ host="{{ openshift.master.cluster_hostname }}"
+ state=started
+ delay=10
+ port="{{ openshift.master.api_port }}"