From a6d5c625956a5051b7bbd9fc48430b9df11084ee Mon Sep 17 00:00:00 2001 From: Michael Gugino Date: Fri, 8 Dec 2017 18:30:09 -0500 Subject: Refactor node upgrade to include less serial tasks This commit moves the pulling of images, packages, and updating config files into a non-serialized play. The serialized play is now in charge of marking unschedulable, draining, stopping and restarting services, and marking schedulable. If rpm install / container download takes 60s per host, this will save 3 hours and 10 minutes at 200 hosts per cluster and forks of 20 hosts. --- .../openshift-cluster/upgrades/upgrade_nodes.yml | 33 ++++-- roles/openshift_node/handlers/main.yml | 6 + roles/openshift_node/tasks/dnsmasq.yml | 43 ------- roles/openshift_node/tasks/dnsmasq_install.yml | 43 +++++++ roles/openshift_node/tasks/docker/upgrade.yml | 27 ----- roles/openshift_node/tasks/main.yml | 1 + roles/openshift_node/tasks/upgrade.yml | 127 +++------------------ .../tasks/upgrade/containerized_node_upgrade.yml | 11 -- roles/openshift_node/tasks/upgrade/restart.yml | 9 ++ roles/openshift_node/tasks/upgrade_pre.yml | 118 +++++++++++++++++++ 10 files changed, 218 insertions(+), 200 deletions(-) create mode 100644 roles/openshift_node/tasks/dnsmasq_install.yml delete mode 100644 roles/openshift_node/tasks/docker/upgrade.yml create mode 100644 roles/openshift_node/tasks/upgrade_pre.yml diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index f7a85545b..a3cb1d0f9 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -1,16 +1,25 @@ --- +- name: Prepull images and rpms before doing rolling restart + hosts: oo_nodes_to_upgrade:!oo_masters_to_config + roles: + - role: openshift_facts + tasks: + - include_role: + name: openshift_node + tasks_from: upgrade_pre.yml + vars: + openshift_node_upgrade_in_progress: True + - name: Drain and upgrade nodes hosts: oo_nodes_to_upgrade:!oo_masters_to_config # This var must be set with -e on invocation, as it is not a per-host inventory var # and is evaluated early. Values such as "20%" can also be used. serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}" - + roles: + - lib_openshift + - openshift_facts pre_tasks: - - name: Load lib_openshift modules - import_role: - name: lib_openshift - # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node # or docker actually needs an upgrade before proceeding. Perhaps best to save this until # we merge upgrade functionality into the base roles and a normal config.yml playbook run. @@ -33,18 +42,12 @@ retries: 60 delay: 60 - roles: - - openshift_facts post_tasks: - include_role: name: openshift_node tasks_from: upgrade.yml vars: openshift_node_upgrade_in_progress: True - - include_role: - name: openshift_excluder - vars: - r_openshift_excluder_action: enable - name: Set node schedulability oc_adm_manage_node: node: "{{ openshift.node.nodename | lower }}" @@ -55,3 +58,11 @@ register: node_schedulable until: node_schedulable|succeeded when: node_unschedulable|changed + +- name: Re-enable excluders + hosts: oo_nodes_to_upgrade:!oo_masters_to_config + tasks: + - include_role: + name: openshift_excluder + vars: + r_openshift_excluder_action: enable diff --git a/roles/openshift_node/handlers/main.yml b/roles/openshift_node/handlers/main.yml index 170a3dc6e..1d9797f84 100644 --- a/roles/openshift_node/handlers/main.yml +++ b/roles/openshift_node/handlers/main.yml @@ -4,11 +4,15 @@ name: NetworkManager state: restarted enabled: True + when: + - (not skip_node_svc_handlers | default(False) | bool) - name: restart dnsmasq systemd: name: dnsmasq state: restarted + when: + - (not skip_node_svc_handlers | default(False) | bool) - name: restart openvswitch systemd: @@ -47,3 +51,5 @@ - name: reload systemd units command: systemctl daemon-reload + when: + - (not skip_node_svc_handlers | default(False) | bool) diff --git a/roles/openshift_node/tasks/dnsmasq.yml b/roles/openshift_node/tasks/dnsmasq.yml index f210a3a21..31ca46ec0 100644 --- a/roles/openshift_node/tasks/dnsmasq.yml +++ b/roles/openshift_node/tasks/dnsmasq.yml @@ -1,43 +1,4 @@ --- -- name: Check for NetworkManager service - command: > - systemctl show NetworkManager - register: nm_show - changed_when: false - ignore_errors: True - -- name: Set fact using_network_manager - set_fact: - network_manager_active: "{{ True if 'ActiveState=active' in nm_show.stdout else False }}" - -- name: Install dnsmasq - package: name=dnsmasq state=installed - when: not openshift.common.is_atomic | bool - register: result - until: result | success - -- name: ensure origin/node directory exists - file: - state: directory - path: "{{ item }}" - owner: root - group: root - mode: '0700' - with_items: - - /etc/origin - - /etc/origin/node - -# this file is copied to /etc/dnsmasq.d/ when the node starts and is removed -# when the node stops. A dbus-message is sent to dnsmasq to add the same entries -# so that dnsmasq doesn't need to be restarted. Once we can use dnsmasq 2.77 or -# newer we can use --server-file option to update the servers dynamically and -# reload them by sending dnsmasq a SIGHUP. We write the file in case someone else -# triggers a restart of dnsmasq but not a node restart. -- name: Install node-dnsmasq.conf - template: - src: node-dnsmasq.conf.j2 - dest: /etc/origin/node/node-dnsmasq.conf - - name: Install dnsmasq configuration template: src: origin-dns.conf.j2 @@ -63,7 +24,3 @@ # Dynamic NetworkManager based dispatcher - include_tasks: dnsmasq/network-manager.yml when: network_manager_active | bool - -# Relies on ansible in order to configure static config -- include_tasks: dnsmasq/no-network-manager.yml - when: not network_manager_active | bool diff --git a/roles/openshift_node/tasks/dnsmasq_install.yml b/roles/openshift_node/tasks/dnsmasq_install.yml new file mode 100644 index 000000000..9f66bf12d --- /dev/null +++ b/roles/openshift_node/tasks/dnsmasq_install.yml @@ -0,0 +1,43 @@ +--- +- name: Check for NetworkManager service + command: > + systemctl show NetworkManager + register: nm_show + changed_when: false + ignore_errors: True + +- name: Set fact using_network_manager + set_fact: + network_manager_active: "{{ True if 'ActiveState=active' in nm_show.stdout else False }}" + +- name: Install dnsmasq + package: name=dnsmasq state=installed + when: not openshift.common.is_atomic | bool + register: result + until: result | success + +- name: ensure origin/node directory exists + file: + state: directory + path: "{{ item }}" + owner: root + group: root + mode: '0700' + with_items: + - /etc/origin + - /etc/origin/node + +# this file is copied to /etc/dnsmasq.d/ when the node starts and is removed +# when the node stops. A dbus-message is sent to dnsmasq to add the same entries +# so that dnsmasq doesn't need to be restarted. Once we can use dnsmasq 2.77 or +# newer we can use --server-file option to update the servers dynamically and +# reload them by sending dnsmasq a SIGHUP. We write the file in case someone else +# triggers a restart of dnsmasq but not a node restart. +- name: Install node-dnsmasq.conf + template: + src: node-dnsmasq.conf.j2 + dest: /etc/origin/node/node-dnsmasq.conf + +# Relies on ansible in order to configure static config +- include_tasks: dnsmasq/no-network-manager.yml + when: not network_manager_active | bool diff --git a/roles/openshift_node/tasks/docker/upgrade.yml b/roles/openshift_node/tasks/docker/upgrade.yml deleted file mode 100644 index bbe9c71f5..000000000 --- a/roles/openshift_node/tasks/docker/upgrade.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -# input variables: -# - openshift_service_type -# - openshift.common.is_containerized -# - docker_version -# - skip_docker_restart - -- name: Check Docker image count - shell: "docker images -aq | wc -l" - register: docker_image_count - -- debug: var=docker_image_count.stdout - -- service: - name: docker - state: stopped - register: l_openshift_node_upgrade_docker_stop_result - until: not l_openshift_node_upgrade_docker_stop_result | failed - retries: 3 - delay: 30 - -- name: Upgrade Docker - package: name=docker{{ '-' + docker_version }} state=present - register: result - until: result | success - -# starting docker happens back in ../main.yml where it calls ../restart.yml diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 32c5f495f..946deb4d3 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -6,6 +6,7 @@ - deployment_type == 'openshift-enterprise' - not openshift_use_crio +- include_tasks: dnsmasq_install.yml - include_tasks: dnsmasq.yml - name: setup firewall diff --git a/roles/openshift_node/tasks/upgrade.yml b/roles/openshift_node/tasks/upgrade.yml index 9f333645a..ede73f22c 100644 --- a/roles/openshift_node/tasks/upgrade.yml +++ b/roles/openshift_node/tasks/upgrade.yml @@ -10,8 +10,6 @@ # tasks file for openshift_node_upgrade -- include_tasks: registry_auth.yml - - name: Stop node and openvswitch services service: name: "{{ item }}" @@ -21,58 +19,6 @@ - openvswitch failed_when: false -- name: Stop additional containerized services - service: - name: "{{ item }}" - state: stopped - with_items: - - "{{ openshift_service_type }}-master-controllers" - - "{{ openshift_service_type }}-master-api" - - etcd_container - failed_when: false - when: openshift.common.is_containerized | bool - -- name: Pre-pull node image - command: > - docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }} - register: pull_result - changed_when: "'Downloaded newer image' in pull_result.stdout" - when: openshift.common.is_containerized | bool - -- name: Pre-pull openvswitch image - command: > - docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} - register: pull_result - changed_when: "'Downloaded newer image' in pull_result.stdout" - when: - - openshift.common.is_containerized | bool - - openshift_use_openshift_sdn | bool - -- include_tasks: docker/upgrade.yml - vars: - # We will restart Docker ourselves after everything is ready: - skip_docker_restart: True - when: - - l_docker_upgrade is defined - - l_docker_upgrade | bool - -- include_tasks: "{{ node_config_hook }}" - when: node_config_hook is defined - -- include_tasks: upgrade/rpm_upgrade.yml - vars: - component: "node" - openshift_version: "{{ openshift_pkg_version | default('') }}" - when: not openshift.common.is_containerized | bool - -- name: Remove obsolete docker-sdn-ovs.conf - file: - path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf" - state: absent - -- include_tasks: upgrade/containerized_node_upgrade.yml - when: openshift.common.is_containerized | bool - - name: Ensure containerized services stopped before Docker restart service: name: "{{ item }}" @@ -86,6 +32,17 @@ failed_when: false when: openshift.common.is_containerized | bool +- service: + name: docker + state: stopped + register: l_openshift_node_upgrade_docker_stop_result + until: not l_openshift_node_upgrade_docker_stop_result | failed + retries: 3 + delay: 30 + when: + - l_docker_upgrade is defined + - l_docker_upgrade | bool + - name: Stop rpm based services service: name: "{{ item }}" @@ -96,56 +53,19 @@ failed_when: false when: not openshift.common.is_containerized | bool +- include_tasks: "{{ node_config_hook }}" + when: node_config_hook is defined + # https://bugzilla.redhat.com/show_bug.cgi?id=1513054 - name: Clean up dockershim data file: path: "/var/lib/dockershim/sandbox/" state: absent -- name: Upgrade openvswitch - package: - name: openvswitch - state: latest - when: not openshift.common.is_containerized | bool - register: result - until: result | success - -- name: Update oreg value - yedit: - src: "{{ openshift.common.config_base }}/node/node-config.yaml" - key: 'imageConfig.format' - value: "{{ oreg_url | default(oreg_url_node) }}" - when: oreg_url is defined or oreg_url_node is defined - -# https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory -- name: Check for swap usage - command: grep "^[^#].*swap" /etc/fstab - # grep: match any lines which don't begin with '#' and contain 'swap' - changed_when: false - failed_when: false - register: swap_result - - # Disable Swap Block -- block: - - - name: Disable swap - command: swapoff --all - - - name: Remove swap entries from /etc/fstab - replace: - dest: /etc/fstab - regexp: '(^[^#].*swap.*)' - replace: '# \1' - backup: yes - - - name: Add notice about disabling swap - lineinfile: - dest: /etc/fstab - line: '# OpenShift-Ansible Installer disabled swap per overcommit guidelines' - state: present - +- name: Disable swap + command: swapoff --all when: - - swap_result.stdout_lines | length > 0 + - openshift_node_upgrade_swap_result | default(False) | bool - openshift_disable_swap | default(true) | bool # End Disable Swap Block @@ -155,17 +75,6 @@ - ansible_selinux is defined - ansible_selinux.status == 'enabled' -- name: Apply 3.6 dns config changes - yedit: - src: /etc/origin/node/node-config.yaml - key: "{{ item.key }}" - value: "{{ item.value }}" - with_items: - - key: "dnsBindAddress" - value: "127.0.0.1:53" - - key: "dnsRecursiveResolvConf" - value: "/etc/origin/node/resolv.conf" - # Restart all services - include_tasks: upgrade/restart.yml @@ -182,3 +91,5 @@ delay: 5 - include_tasks: dnsmasq.yml + +- meta: flush_handlers diff --git a/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml b/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml index 245de60a7..8e547351b 100644 --- a/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml +++ b/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml @@ -1,14 +1,3 @@ --- -# This is a hack to allow us to use systemd_units.yml, but skip the handlers which -# restart services. We will unconditionally restart all containerized services -# because we have to unconditionally restart Docker: -- set_fact: - skip_node_svc_handlers: True - - name: Update systemd units include_tasks: ../systemd_units.yml - -# This is a no-op because of skip_node_svc_handlers, but lets us trigger it before end of -# play when the node has already been marked schedulable again. (this would look strange -# in logs otherwise) -- meta: flush_handlers diff --git a/roles/openshift_node/tasks/upgrade/restart.yml b/roles/openshift_node/tasks/upgrade/restart.yml index 65c301783..717cfa712 100644 --- a/roles/openshift_node/tasks/upgrade/restart.yml +++ b/roles/openshift_node/tasks/upgrade/restart.yml @@ -13,6 +13,15 @@ - name: Reload systemd to ensure latest unit files command: systemctl daemon-reload +- name: Restart support services + service: + name: "{{ item }}" + state: restarted + enabled: True + with_items: + - NetworkManager + - dnsmasq + - name: Restart container runtime service: name: "{{ openshift_docker_service_name }}" diff --git a/roles/openshift_node/tasks/upgrade_pre.yml b/roles/openshift_node/tasks/upgrade_pre.yml new file mode 100644 index 000000000..5d7961a24 --- /dev/null +++ b/roles/openshift_node/tasks/upgrade_pre.yml @@ -0,0 +1,118 @@ +--- +# This is a hack to allow us to update various components without restarting +# services. This will persist into the upgrade play as well, so everything +# needs to be restarted by hand. +- set_fact: + skip_node_svc_handlers: True + +- include_tasks: registry_auth.yml + +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: + - l_docker_upgrade is defined + - l_docker_upgrade | bool + +- debug: var=docker_image_count.stdout + when: + - l_docker_upgrade is defined + - l_docker_upgrade | bool + +- name: Upgrade Docker + package: name=docker{{ '-' + docker_version }} state=present + register: result + until: result | success + when: + - l_docker_upgrade is defined + - l_docker_upgrade | bool + +- name: Pre-pull node image + command: > + docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: openshift.common.is_containerized | bool + +- name: Pre-pull openvswitch image + command: > + docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: + - openshift.common.is_containerized | bool + - openshift_use_openshift_sdn | bool + +- include_tasks: upgrade/rpm_upgrade.yml + vars: + component: "node" + openshift_version: "{{ openshift_pkg_version | default('') }}" + when: not openshift.common.is_containerized | bool + +- name: Remove obsolete docker-sdn-ovs.conf + file: + path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf" + state: absent + +- include_tasks: upgrade/containerized_node_upgrade.yml + when: openshift.common.is_containerized | bool + +- name: Upgrade openvswitch + package: + name: openvswitch + state: latest + when: not openshift.common.is_containerized | bool + register: result + until: result | success + +- name: Update oreg value + yedit: + src: "{{ openshift.common.config_base }}/node/node-config.yaml" + key: 'imageConfig.format' + value: "{{ oreg_url | default(oreg_url_node) }}" + when: oreg_url is defined or oreg_url_node is defined + +# https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory +- name: Check for swap usage + command: grep "^[^#].*swap" /etc/fstab + # grep: match any lines which don't begin with '#' and contain 'swap' + changed_when: false + failed_when: false + register: swap_result + +# Set this fact here so we can use it during the next play, which is serial. +- name: set_fact swap_result + set_fact: + openshift_node_upgrade_swap_result: "{{ swap_result.stdout_lines | length > 0 | bool }}" + +# Disable Swap Block (pre) +- block: + - name: Remove swap entries from /etc/fstab + replace: + dest: /etc/fstab + regexp: '(^[^#].*swap.*)' + replace: '# \1' + backup: yes + + - name: Add notice about disabling swap + lineinfile: + dest: /etc/fstab + line: '# OpenShift-Ansible Installer disabled swap per overcommit guidelines' + state: present + when: + - openshift_node_upgrade_swap_result | default(False) | bool + - openshift_disable_swap | default(true) | bool + # End Disable Swap Block + +- name: Apply 3.6 dns config changes + yedit: + src: /etc/origin/node/node-config.yaml + key: "{{ item.key }}" + value: "{{ item.value }}" + with_items: + - key: "dnsBindAddress" + value: "127.0.0.1:53" + - key: "dnsRecursiveResolvConf" + value: "/etc/origin/node/resolv.conf" + +- include_tasks: dnsmasq_install.yml -- cgit v1.2.3 From 504930b1915c2b6329c1f68f23b1188beaabcbe9 Mon Sep 17 00:00:00 2001 From: Michael Gugino Date: Mon, 11 Dec 2017 16:47:44 -0500 Subject: upgrade node mark 2 --- roles/openshift_node/tasks/config.yml | 4 + roles/openshift_node/tasks/container_images.yml | 20 +++++ roles/openshift_node/tasks/systemd_units.yml | 23 +----- roles/openshift_node/tasks/upgrade.yml | 70 ++++-------------- .../tasks/upgrade/config_changes.yml | 77 +++++++++++++++++++ .../tasks/upgrade/containerized_node_upgrade.yml | 3 - .../tasks/upgrade/containerized_upgrade_pull.yml | 15 ++++ roles/openshift_node/tasks/upgrade/rpm_upgrade.yml | 33 +++------ .../tasks/upgrade/rpm_upgrade_install.yml | 19 +++++ .../openshift_node/tasks/upgrade/stop_services.yml | 43 +++++++++++ roles/openshift_node/tasks/upgrade_pre.yml | 86 +++------------------- 11 files changed, 219 insertions(+), 174 deletions(-) create mode 100644 roles/openshift_node/tasks/container_images.yml create mode 100644 roles/openshift_node/tasks/upgrade/config_changes.yml delete mode 100644 roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml create mode 100644 roles/openshift_node/tasks/upgrade/containerized_upgrade_pull.yml create mode 100644 roles/openshift_node/tasks/upgrade/rpm_upgrade_install.yml create mode 100644 roles/openshift_node/tasks/upgrade/stop_services.yml diff --git a/roles/openshift_node/tasks/config.yml b/roles/openshift_node/tasks/config.yml index e5c80bd09..5d66de0a3 100644 --- a/roles/openshift_node/tasks/config.yml +++ b/roles/openshift_node/tasks/config.yml @@ -2,6 +2,10 @@ - name: Install the systemd units include_tasks: systemd_units.yml +- name: Pull container images + include_tasks: container_images.yml + when: openshift.common.is_containerized | bool + - name: Start and enable openvswitch service systemd: name: openvswitch.service diff --git a/roles/openshift_node/tasks/container_images.yml b/roles/openshift_node/tasks/container_images.yml new file mode 100644 index 000000000..0b8c806ae --- /dev/null +++ b/roles/openshift_node/tasks/container_images.yml @@ -0,0 +1,20 @@ +--- +- name: Install Node system container + include_tasks: node_system_container.yml + when: + - l_is_node_system_container | bool + +- name: Install OpenvSwitch system containers + include_tasks: openvswitch_system_container.yml + when: + - openshift_node_use_openshift_sdn | bool + - l_is_openvswitch_system_container | bool + +- name: Pre-pull openvswitch image + command: > + docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: + - openshift_node_use_openshift_sdn | bool + - not l_is_openvswitch_system_container | bool diff --git a/roles/openshift_node/tasks/systemd_units.yml b/roles/openshift_node/tasks/systemd_units.yml index c532147b1..262ee698b 100644 --- a/roles/openshift_node/tasks/systemd_units.yml +++ b/roles/openshift_node/tasks/systemd_units.yml @@ -16,29 +16,10 @@ - name: include ovs service environment file include_tasks: config/install-ovs-service-env-file.yml - - name: Install Node system container - include_tasks: node_system_container.yml - when: - - l_is_node_system_container | bool - - - name: Install OpenvSwitch system containers - include_tasks: openvswitch_system_container.yml + - include_tasks: config/install-ovs-docker-service-file.yml when: - openshift_node_use_openshift_sdn | bool - - l_is_openvswitch_system_container | bool - -- block: - - name: Pre-pull openvswitch image - command: > - docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} - register: pull_result - changed_when: "'Downloaded newer image' in pull_result.stdout" - - - include_tasks: config/install-ovs-docker-service-file.yml - when: - - openshift.common.is_containerized | bool - - openshift_node_use_openshift_sdn | bool - - not l_is_openvswitch_system_container | bool + - not l_is_openvswitch_system_container | bool - include_tasks: config/configure-node-settings.yml - include_tasks: config/configure-proxy-settings.yml diff --git a/roles/openshift_node/tasks/upgrade.yml b/roles/openshift_node/tasks/upgrade.yml index ede73f22c..87556533a 100644 --- a/roles/openshift_node/tasks/upgrade.yml +++ b/roles/openshift_node/tasks/upgrade.yml @@ -10,70 +10,29 @@ # tasks file for openshift_node_upgrade -- name: Stop node and openvswitch services - service: - name: "{{ item }}" - state: stopped - with_items: - - "{{ openshift_service_type }}-node" - - openvswitch - failed_when: false - -- name: Ensure containerized services stopped before Docker restart - service: - name: "{{ item }}" - state: stopped - with_items: - - etcd_container - - openvswitch - - "{{ openshift_service_type }}-master-api" - - "{{ openshift_service_type }}-master-controllers" - - "{{ openshift_service_type }}-node" - failed_when: false - when: openshift.common.is_containerized | bool - -- service: - name: docker - state: stopped - register: l_openshift_node_upgrade_docker_stop_result - until: not l_openshift_node_upgrade_docker_stop_result | failed - retries: 3 - delay: 30 +- name: stop services for upgrade + include_tasks: upgrade/stop_services.yml + +# Ensure actually install latest package. +- name: download docker upgrade rpm + command: "{{ ansible_pkg_mgr }} install -C -y docker{{ '-' + docker_version }}" + register: result + until: result | success when: - l_docker_upgrade is defined - l_docker_upgrade | bool -- name: Stop rpm based services - service: - name: "{{ item }}" - state: stopped - with_items: - - "{{ openshift_service_type }}-node" - - openvswitch - failed_when: false +- name: install pre-pulled rpms. + include_tasks: upgrade/rpm_upgrade_install.yml + vars: + openshift_version: "{{ openshift_pkg_version | default('') }}" when: not openshift.common.is_containerized | bool + - include_tasks: "{{ node_config_hook }}" when: node_config_hook is defined -# https://bugzilla.redhat.com/show_bug.cgi?id=1513054 -- name: Clean up dockershim data - file: - path: "/var/lib/dockershim/sandbox/" - state: absent - -- name: Disable swap - command: swapoff --all - when: - - openshift_node_upgrade_swap_result | default(False) | bool - - openshift_disable_swap | default(true) | bool - # End Disable Swap Block - -- name: Reset selinux context - command: restorecon -RF {{ openshift_node_data_dir }}/openshift.local.volumes - when: - - ansible_selinux is defined - - ansible_selinux.status == 'enabled' +- include_tasks: upgrade/config_changes.yml # Restart all services - include_tasks: upgrade/restart.yml @@ -90,6 +49,7 @@ retries: 24 delay: 5 +- include_tasks: dnsmasq_install.yml - include_tasks: dnsmasq.yml - meta: flush_handlers diff --git a/roles/openshift_node/tasks/upgrade/config_changes.yml b/roles/openshift_node/tasks/upgrade/config_changes.yml new file mode 100644 index 000000000..e22018e6d --- /dev/null +++ b/roles/openshift_node/tasks/upgrade/config_changes.yml @@ -0,0 +1,77 @@ +--- +- name: Update systemd units + include_tasks: ../systemd_units.yml + when: openshift.common.is_containerized + +- name: Update oreg value + yedit: + src: "{{ openshift.common.config_base }}/node/node-config.yaml" + key: 'imageConfig.format' + value: "{{ oreg_url | default(oreg_url_node) }}" + when: oreg_url is defined or oreg_url_node is defined + +- name: Remove obsolete docker-sdn-ovs.conf + file: + path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf" + state: absent + +# https://bugzilla.redhat.com/show_bug.cgi?id=1513054 +- name: Clean up dockershim data + file: + path: "/var/lib/dockershim/sandbox/" + state: absent + +# Disable Swap Block (pre) +- block: + - name: Remove swap entries from /etc/fstab + replace: + dest: /etc/fstab + regexp: '(^[^#].*swap.*)' + replace: '# \1' + backup: yes + + - name: Add notice about disabling swap + lineinfile: + dest: /etc/fstab + line: '# OpenShift-Ansible Installer disabled swap per overcommit guidelines' + state: present + + - name: Disable swap + command: swapoff --all + + when: + - openshift_node_upgrade_swap_result | default(False) | bool + - openshift_disable_swap | default(true) | bool +# End Disable Swap Block + +- name: Apply 3.6 dns config changes + yedit: + src: /etc/origin/node/node-config.yaml + key: "{{ item.key }}" + value: "{{ item.value }}" + with_items: + - key: "dnsBindAddress" + value: "127.0.0.1:53" + - key: "dnsRecursiveResolvConf" + value: "/etc/origin/node/resolv.conf" + +- name: Install Node service file + template: + dest: "/etc/systemd/system/{{ openshift_service_type }}-node.service" + src: "node.service.j2" + register: l_node_unit + +- name: Reset selinux context + command: restorecon -RF {{ openshift_node_data_dir }}/openshift.local.volumes + when: + - ansible_selinux is defined + - ansible_selinux.status == 'enabled' + +# NOTE: This is needed to make sure we are using the correct set +# of systemd unit files. The RPMs lay down defaults but +# the install/upgrade may override them in /etc/systemd/system/. +# NOTE: We don't use the systemd module as some versions of the module +# require a service to be part of the call. +- name: Reload systemd units + command: systemctl daemon-reload + when: l_node_unit | changed diff --git a/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml b/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml deleted file mode 100644 index 8e547351b..000000000 --- a/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -- name: Update systemd units - include_tasks: ../systemd_units.yml diff --git a/roles/openshift_node/tasks/upgrade/containerized_upgrade_pull.yml b/roles/openshift_node/tasks/upgrade/containerized_upgrade_pull.yml new file mode 100644 index 000000000..71f00dcd2 --- /dev/null +++ b/roles/openshift_node/tasks/upgrade/containerized_upgrade_pull.yml @@ -0,0 +1,15 @@ +--- +- name: Pre-pull node image + command: > + docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + +- name: Pre-pull openvswitch image + command: > + docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: openshift_use_openshift_sdn | bool + +- include_tasks: ../container_images.yml diff --git a/roles/openshift_node/tasks/upgrade/rpm_upgrade.yml b/roles/openshift_node/tasks/upgrade/rpm_upgrade.yml index 120b93bc3..d2864e6b8 100644 --- a/roles/openshift_node/tasks/upgrade/rpm_upgrade.yml +++ b/roles/openshift_node/tasks/upgrade/rpm_upgrade.yml @@ -5,29 +5,20 @@ # - openshift_pkg_version # - openshift.common.is_atomic -# We verified latest rpm available is suitable, so just yum update. -- name: Upgrade packages - package: "name={{ openshift_service_type }}-{{ component }}{{ openshift_pkg_version }} state=present" +# Pre-pull new node rpm, but don't install +- name: download new node packages + command: "{{ ansible_pkg_mgr }} install -y --downloadonly {{ openshift_node_upgrade_rpm_list | join(' ')}}" register: result until: result | success + vars: + openshift_node_upgrade_rpm_list: + - "{{ openshift_service_type }}-node{{ openshift_pkg_version }}" + - "PyYAML" + - "dnsmasq" -- name: Ensure python-yaml present for config upgrade - package: name=PyYAML state=present - when: not openshift.common.is_atomic | bool +# Pre-pull the rpms for openvswitch, but don't install +# openvswitch requires the latest version to be installed. +- name: download openvswitch upgrade rpm + command: "{{ ansible_pkg_mgr }} update -y --downloadonly openvswitch" register: result until: result | success - -- name: Install Node service file - template: - dest: "/etc/systemd/system/{{ openshift_service_type }}-node.service" - src: "node.service.j2" - register: l_node_unit - -# NOTE: This is needed to make sure we are using the correct set -# of systemd unit files. The RPMs lay down defaults but -# the install/upgrade may override them in /etc/systemd/system/. -# NOTE: We don't use the systemd module as some versions of the module -# require a service to be part of the call. -- name: Reload systemd units - command: systemctl daemon-reload - when: l_node_unit | changed diff --git a/roles/openshift_node/tasks/upgrade/rpm_upgrade_install.yml b/roles/openshift_node/tasks/upgrade/rpm_upgrade_install.yml new file mode 100644 index 000000000..6390be558 --- /dev/null +++ b/roles/openshift_node/tasks/upgrade/rpm_upgrade_install.yml @@ -0,0 +1,19 @@ +--- +# input variables: +# - openshift_service_type +# - component +# - openshift_pkg_version +# - openshift.common.is_atomic + +# Install the pre-pulled RPM +# Note: dnsmasq is covered in it's own play. openvswitch is included here +# because once we have the latest rpm downloaded, it will happily be installed. +- name: download new node packages + command: "{{ ansible_pkg_mgr }} install -C -y {{ openshift_node_upgrade_rpm_list | join(' ')}}" + register: result + until: result | success + vars: + openshift_node_upgrade_rpm_list: + - "{{ openshift_service_type }}-node{{ openshift_pkg_version }}" + - "PyYAML" + - "openvswitch" diff --git a/roles/openshift_node/tasks/upgrade/stop_services.yml b/roles/openshift_node/tasks/upgrade/stop_services.yml new file mode 100644 index 000000000..bbf1c5f25 --- /dev/null +++ b/roles/openshift_node/tasks/upgrade/stop_services.yml @@ -0,0 +1,43 @@ +--- +- name: Stop node and openvswitch services + service: + name: "{{ item }}" + state: stopped + with_items: + - "{{ openshift_service_type }}-node" + - openvswitch + failed_when: false + +- name: Ensure containerized services stopped before Docker restart + service: + name: "{{ item }}" + state: stopped + with_items: + - etcd_container + - openvswitch + - "{{ openshift_service_type }}-master-api" + - "{{ openshift_service_type }}-master-controllers" + - "{{ openshift_service_type }}-node" + failed_when: false + when: openshift.common.is_containerized | bool + +- service: + name: docker + state: stopped + register: l_openshift_node_upgrade_docker_stop_result + until: not l_openshift_node_upgrade_docker_stop_result | failed + retries: 3 + delay: 30 + when: + - l_docker_upgrade is defined + - l_docker_upgrade | bool + +- name: Stop rpm based services + service: + name: "{{ item }}" + state: stopped + with_items: + - "{{ openshift_service_type }}-node" + - openvswitch + failed_when: false + when: not openshift.common.is_containerized | bool diff --git a/roles/openshift_node/tasks/upgrade_pre.yml b/roles/openshift_node/tasks/upgrade_pre.yml index 5d7961a24..3346b7c65 100644 --- a/roles/openshift_node/tasks/upgrade_pre.yml +++ b/roles/openshift_node/tasks/upgrade_pre.yml @@ -7,6 +7,12 @@ - include_tasks: registry_auth.yml +- name: update package meta data to speed install later. + command: "{{ ansible_pkg_mgr }} makecache" + register: result + until: result | success + when: not openshift.common.is_containerized | bool + - name: Check Docker image count shell: "docker images -aq | wc -l" register: docker_image_count @@ -19,59 +25,23 @@ - l_docker_upgrade is defined - l_docker_upgrade | bool -- name: Upgrade Docker - package: name=docker{{ '-' + docker_version }} state=present +- include_tasks: upgrade/containerized_upgrade_pull.yml + when: openshift.common.is_containerized | bool + +# Prepull the rpms for docker upgrade, but don't install +- name: download docker upgrade rpm + command: "{{ ansible_pkg_mgr }} install -y --downloadonly docker{{ '-' + docker_version }}" register: result until: result | success when: - l_docker_upgrade is defined - l_docker_upgrade | bool -- name: Pre-pull node image - command: > - docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }} - register: pull_result - changed_when: "'Downloaded newer image' in pull_result.stdout" - when: openshift.common.is_containerized | bool - -- name: Pre-pull openvswitch image - command: > - docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} - register: pull_result - changed_when: "'Downloaded newer image' in pull_result.stdout" - when: - - openshift.common.is_containerized | bool - - openshift_use_openshift_sdn | bool - - include_tasks: upgrade/rpm_upgrade.yml vars: - component: "node" openshift_version: "{{ openshift_pkg_version | default('') }}" when: not openshift.common.is_containerized | bool -- name: Remove obsolete docker-sdn-ovs.conf - file: - path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf" - state: absent - -- include_tasks: upgrade/containerized_node_upgrade.yml - when: openshift.common.is_containerized | bool - -- name: Upgrade openvswitch - package: - name: openvswitch - state: latest - when: not openshift.common.is_containerized | bool - register: result - until: result | success - -- name: Update oreg value - yedit: - src: "{{ openshift.common.config_base }}/node/node-config.yaml" - key: 'imageConfig.format' - value: "{{ oreg_url | default(oreg_url_node) }}" - when: oreg_url is defined or oreg_url_node is defined - # https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory - name: Check for swap usage command: grep "^[^#].*swap" /etc/fstab @@ -84,35 +54,3 @@ - name: set_fact swap_result set_fact: openshift_node_upgrade_swap_result: "{{ swap_result.stdout_lines | length > 0 | bool }}" - -# Disable Swap Block (pre) -- block: - - name: Remove swap entries from /etc/fstab - replace: - dest: /etc/fstab - regexp: '(^[^#].*swap.*)' - replace: '# \1' - backup: yes - - - name: Add notice about disabling swap - lineinfile: - dest: /etc/fstab - line: '# OpenShift-Ansible Installer disabled swap per overcommit guidelines' - state: present - when: - - openshift_node_upgrade_swap_result | default(False) | bool - - openshift_disable_swap | default(true) | bool - # End Disable Swap Block - -- name: Apply 3.6 dns config changes - yedit: - src: /etc/origin/node/node-config.yaml - key: "{{ item.key }}" - value: "{{ item.value }}" - with_items: - - key: "dnsBindAddress" - value: "127.0.0.1:53" - - key: "dnsRecursiveResolvConf" - value: "/etc/origin/node/resolv.conf" - -- include_tasks: dnsmasq_install.yml -- cgit v1.2.3