diff options
Diffstat (limited to 'roles/openshift_node_upgrade')
15 files changed, 524 insertions, 0 deletions
diff --git a/roles/openshift_node_upgrade/README.md b/roles/openshift_node_upgrade/README.md new file mode 100644 index 000000000..e21bee412 --- /dev/null +++ b/roles/openshift_node_upgrade/README.md @@ -0,0 +1,108 @@ +OpenShift/Atomic Enterprise Node upgrade +========= + +Role responsible for a single node upgrade. +It is expected a node is functioning and a part of an OpenShift cluster. + +Requirements +------------ + +TODO + +Role Variables +-------------- +From this role: + +| Name | Default value | | +|--------------------------------|-----------------------|--------------------------------------------------------| +| deployment_type | | Inventory var | +| docker_upgrade_nuke_images | | Optional inventory var | +| docker_version | | Optional inventory var | +| l_docker_upgrade | | | +| node_config_hook | | | +| openshift.docker.gte_1_10 | | | +| openshift_image_tag | | Set by openshift_version role | +| openshift_pkg_version | | Set by openshift_version role | +| openshift_release | | Set by openshift_version role | +| skip_docker_restart | | | +| openshift_cloudprovider_kind | | | + +From openshift.common: + +| Name | Default Value | | +|------------------------------------|---------------------|---------------------| +| openshift.common.config_base |---------------------|---------------------| +| openshift.common.data_dir |---------------------|---------------------| +| openshift.common.hostname |---------------------|---------------------| +| openshift.common.http_proxy |---------------------|---------------------| +| openshift.common.is_atomic |---------------------|---------------------| +| openshift.common.is_containerized |---------------------|---------------------| +| openshift.common.portal_net |---------------------|---------------------| +| openshift.common.service_type |---------------------|---------------------| +| openshift.common.use_openshift_sdn |---------------------|---------------------| + +From openshift.master: + +| Name | Default Value | | +|------------------------------------|---------------------|---------------------| +| openshift.master.api_port |---------------------|---------------------| + +From openshift.node: + +| Name | Default Value | | +|------------------------------------|---------------------|---------------------| +| openshift.node.debug_level |---------------------|---------------------| +| openshift.node.node_image |---------------------|---------------------| +| openshift.node.ovs_image |---------------------|---------------------| + + +Dependencies +------------ +openshift_common + +TODO + +Example Playbook +---------------- + +Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: + +``` +--- +- name: Upgrade nodes + hosts: oo_nodes_to_upgrade + serial: 1 + any_errors_fatal: true + + pre_tasks: + - name: Mark unschedulable + command: > + {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false + delegate_to: "{{ groups.oo_first_master.0 }}" + + - name: Drain Node for Kubelet upgrade + command: > + {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data + delegate_to: "{{ groups.oo_first_master.0 }}" + + roles: + - openshift_facts + - docker + - openshift_node_upgrade + + post_tasks: + - name: Set node schedulability + command: > + {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true + delegate_to: "{{ groups.oo_first_master.0 }}" +``` + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +TODO diff --git a/roles/openshift_node_upgrade/files/nuke_images.sh b/roles/openshift_node_upgrade/files/nuke_images.sh new file mode 100644 index 000000000..8635eab0d --- /dev/null +++ b/roles/openshift_node_upgrade/files/nuke_images.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Stop any running containers +running_container_ids=`docker ps -q` +if test -n "$running_container_ids" +then + docker stop $running_container_ids +fi + +# Delete all containers +container_ids=`docker ps -a -q` +if test -n "$container_ids" +then + docker rm -f -v $container_ids +fi + +# Delete all images (forcefully) +image_ids=`docker images -aq` +if test -n "$image_ids" +then + # Some layers are deleted recursively and are no longer present + # when docker goes to remove them: + docker rmi -f `docker images -aq` || true +fi + diff --git a/roles/openshift_node_upgrade/handlers/main.yml b/roles/openshift_node_upgrade/handlers/main.yml new file mode 100644 index 000000000..cb51416d4 --- /dev/null +++ b/roles/openshift_node_upgrade/handlers/main.yml @@ -0,0 +1,14 @@ +--- +- name: restart openvswitch + systemd: name=openvswitch state=restarted + when: (not skip_node_svc_handlers | default(False) | bool) and not (ovs_service_status_changed | default(false) | bool) and openshift.common.use_openshift_sdn | bool + notify: + - restart openvswitch pause + +- name: restart openvswitch pause + pause: seconds=15 + when: (not skip_node_svc_handlers | default(False) | bool) and openshift.common.is_containerized | bool + +- name: restart node + systemd: name={{ openshift.common.service_type }}-node state=restarted + when: (not skip_node_svc_handlers | default(False) | bool) and not (node_service_status_changed | default(false) | bool) diff --git a/roles/openshift_node_upgrade/meta/main.yml b/roles/openshift_node_upgrade/meta/main.yml new file mode 100644 index 000000000..cd2f362aa --- /dev/null +++ b/roles/openshift_node_upgrade/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: your name + description: OpenShift Node upgrade + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.1 + platforms: + - name: EL + versions: + - 7 +dependencies: +- role: openshift_common diff --git a/roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml b/roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml new file mode 100644 index 000000000..07b0ac715 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml @@ -0,0 +1,14 @@ +--- +# This is a hack to allow us to use systemd_units.yml, but skip the handlers which +# restart services. We will unconditionally restart all containerized services +# because we have to unconditionally restart Docker: +- set_fact: + skip_node_svc_handlers: True + +- name: Update systemd units + include: systemd_units.yml + +# This is a no-op because of skip_node_svc_handlers, but lets us trigger it before end of +# play when the node has already been marked schedulable again. (this would look strange +# in logs otherwise) +- meta: flush_handlers diff --git a/roles/openshift_node_upgrade/tasks/docker/restart.yml b/roles/openshift_node_upgrade/tasks/docker/restart.yml new file mode 100644 index 000000000..176fc3c0b --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/docker/restart.yml @@ -0,0 +1,33 @@ +--- +# input variables: +# - openshift.common.service_type +# - openshift.common.is_containerized +# - openshift.common.hostname +# - openshift.master.api_port + +- name: Restart docker + service: name=docker state=restarted + +- name: Update docker facts + openshift_facts: + role: docker + +- name: Restart containerized services + service: name={{ item }} state=started + with_items: + - etcd_container + - openvswitch + - "{{ openshift.common.service_type }}-master" + - "{{ openshift.common.service_type }}-master-api" + - "{{ openshift.common.service_type }}-master-controllers" + - "{{ openshift.common.service_type }}-node" + failed_when: false + when: openshift.common.is_containerized | bool + +- name: Wait for master API to come back online + wait_for: + host: "{{ openshift.common.hostname }}" + state: started + delay: 10 + port: "{{ openshift.master.api_port }}" + when: inventory_hostname in groups.oo_masters_to_config diff --git a/roles/openshift_node_upgrade/tasks/docker/upgrade.yml b/roles/openshift_node_upgrade/tasks/docker/upgrade.yml new file mode 100644 index 000000000..e91891ca9 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/docker/upgrade.yml @@ -0,0 +1,49 @@ +--- +# input variables: +# - openshift.common.service_type +# - openshift.common.is_containerized +# - docker_upgrade_nuke_images +# - docker_version +# - skip_docker_restart + +# We need docker service up to remove all the images, but these services will keep +# trying to re-start and thus re-pull the images we're trying to delete. +- name: Stop containerized services + service: name={{ item }} state=stopped + with_items: + - "{{ openshift.common.service_type }}-master" + - "{{ openshift.common.service_type }}-master-api" + - "{{ openshift.common.service_type }}-master-controllers" + - "{{ openshift.common.service_type }}-node" + - etcd_container + - openvswitch + failed_when: false + when: openshift.common.is_containerized | bool + +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + +- debug: var=docker_image_count.stdout + +# TODO(jchaloup): put all docker_upgrade_nuke_images into a block with only one condition +- name: Remove all containers and images + script: nuke_images.sh + register: nuke_images_result + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- debug: var=docker_image_count.stdout + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- service: name=docker state=stopped + +- name: Upgrade Docker + package: name=docker{{ '-' + docker_version }} state=present + +- include: restart.yml + when: not skip_docker_restart | default(False) | bool diff --git a/roles/openshift_node_upgrade/tasks/main.yml b/roles/openshift_node_upgrade/tasks/main.yml new file mode 100644 index 000000000..b1d5f0e0f --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/main.yml @@ -0,0 +1,77 @@ +--- +# input variables: +# - l_docker_upgrade +# - openshift.common.is_atomic +# - node_config_hook +# - openshift_pkg_version +# - openshift.common.is_containerized +# - deployment_type +# - openshift_release + +# tasks file for openshift_node_upgrade +- include: docker/upgrade.yml + vars: + # We will restart Docker ourselves after everything is ready: + skip_docker_restart: True + when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool + +- include: "{{ node_config_hook }}" + when: node_config_hook is defined + +- include: rpm_upgrade.yml + vars: + component: "node" + openshift_version: "{{ openshift_pkg_version | default('') }}" + when: not openshift.common.is_containerized | bool + +- name: Remove obsolete docker-sdn-ovs.conf + file: path=/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf state=absent + when: (deployment_type == 'openshift-enterprise' and openshift_release | version_compare('3.4', '>=')) or (deployment_type == 'origin' and openshift_release | version_compare('1.4', '>=')) + +- include: containerized_node_upgrade.yml + when: openshift.common.is_containerized | bool + +- name: Ensure containerized services stopped before Docker restart + service: name={{ item }} state=stopped + with_items: + - etcd_container + - openvswitch + - "{{ openshift.common.service_type }}-master" + - "{{ openshift.common.service_type }}-master-api" + - "{{ openshift.common.service_type }}-master-controllers" + - "{{ openshift.common.service_type }}-node" + failed_when: false + when: openshift.common.is_containerized | bool + +- name: Upgrade openvswitch + package: + name: openvswitch + state: latest + register: ovs_pkg + when: not openshift.common.is_containerized | bool + +- name: Restart openvswitch + systemd: + name: openvswitch + state: restarted + when: + - not openshift.common.is_containerized | bool + - ovs_pkg | changed + +# Mandatory Docker restart, ensure all containerized services are running: +- include: docker/restart.yml + +- name: Restart rpm node service + service: name="{{ openshift.common.service_type }}-node" state=restarted + when: not openshift.common.is_containerized | bool + +- name: Wait for node to be ready + command: > + {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.common.hostname | lower }} --no-headers + register: node_output + delegate_to: "{{ groups.oo_first_master.0 }}" + until: "{{ node_output.stdout.split()[1].startswith('Ready')}}" + # Give the node two minutes to come back online. Note that we pre-pull images now + # so containerized services should restart quickly as well. + retries: 24 + delay: 5 diff --git a/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml b/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml new file mode 100644 index 000000000..480e87d58 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml @@ -0,0 +1,14 @@ +--- +# input variables: +# - openshift.common.service_type +# - component +# - openshift_pkg_version +# - openshift.common.is_atomic + +# We verified latest rpm available is suitable, so just yum update. +- name: Upgrade packages + package: "name={{ openshift.common.service_type }}-{{ component }}{{ openshift_pkg_version }} state=present" + +- name: Ensure python-yaml present for config upgrade + package: name=PyYAML state=present + when: not openshift.common.is_atomic | bool diff --git a/roles/openshift_node_upgrade/tasks/systemd_units.yml b/roles/openshift_node_upgrade/tasks/systemd_units.yml new file mode 100644 index 000000000..862cd19c4 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/systemd_units.yml @@ -0,0 +1,119 @@ +--- +# input variables +# - openshift.node.node_image +# - openshift_image_tag +# - openshift.common.is_containerized +# - openshift.node.ovs_image +# - openshift.common.use_openshift_sdn +# - openshift.common.service_type +# - openshift.node.debug_level +# - openshift.common.config_base +# - openshift.common.http_proxy +# - openshift.common.portal_net +# - openshift.common +# - openshift.common.http_proxy +# notify: +# - restart openvswitch +# - restart node + +# This file is included both in the openshift_master role and in the upgrade +# playbooks. + +- name: Pre-pull node image + command: > + docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: openshift.common.is_containerized | bool + +- name: Pre-pull openvswitch image + command: > + docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: openshift.common.is_containerized | bool and openshift.common.use_openshift_sdn | bool + +- name: Install Node dependencies docker service file + template: + dest: "/etc/systemd/system/{{ openshift.common.service_type }}-node-dep.service" + src: openshift.docker.node.dep.service + register: install_node_dep_result + when: openshift.common.is_containerized | bool + +- name: Install Node docker service file + template: + dest: "/etc/systemd/system/{{ openshift.common.service_type }}-node.service" + src: openshift.docker.node.service + register: install_node_result + when: openshift.common.is_containerized | bool + +- name: Create the openvswitch service env file + template: + src: openvswitch.sysconfig.j2 + dest: /etc/sysconfig/openvswitch + when: openshift.common.is_containerized | bool + register: install_ovs_sysconfig + notify: + - restart openvswitch + +# May be a temporary workaround. +# https://bugzilla.redhat.com/show_bug.cgi?id=1331590 +- name: Create OpenvSwitch service.d directory + file: path=/etc/systemd/system/openvswitch.service.d/ state=directory + when: openshift.common.use_openshift_sdn | default(true) | bool + +- name: Install OpenvSwitch service OOM fix + template: + dest: "/etc/systemd/system/openvswitch.service.d/01-avoid-oom.conf" + src: openvswitch-avoid-oom.conf + when: openshift.common.use_openshift_sdn | default(true) | bool + register: install_oom_fix_result + notify: + - restart openvswitch + +- name: Install OpenvSwitch docker service file + template: + dest: "/etc/systemd/system/openvswitch.service" + src: openvswitch.docker.service + when: openshift.common.is_containerized | bool and openshift.common.use_openshift_sdn | default(true) | bool + notify: + - restart openvswitch + +- name: Configure Node settings + lineinfile: + dest: /etc/sysconfig/{{ openshift.common.service_type }}-node + regexp: "{{ item.regex }}" + line: "{{ item.line }}" + create: true + with_items: + - regex: '^OPTIONS=' + line: "OPTIONS=--loglevel={{ openshift.node.debug_level | default(2) }}" + - regex: '^CONFIG_FILE=' + line: "CONFIG_FILE={{ openshift.common.config_base }}/node/node-config.yaml" + - regex: '^IMAGE_VERSION=' + line: "IMAGE_VERSION={{ openshift_image_tag }}" + notify: + - restart node + +- name: Configure Proxy Settings + lineinfile: + dest: /etc/sysconfig/{{ openshift.common.service_type }}-node + regexp: "{{ item.regex }}" + line: "{{ item.line }}" + create: true + with_items: + - regex: '^HTTP_PROXY=' + line: "HTTP_PROXY={{ openshift.common.http_proxy | default('') }}" + - regex: '^HTTPS_PROXY=' + line: "HTTPS_PROXY={{ openshift.common.https_proxy | default('') }}" + - regex: '^NO_PROXY=' + line: "NO_PROXY={{ openshift.common.no_proxy | default([]) }},{{ openshift.common.portal_net }},{{ hostvars[groups.oo_first_master.0].openshift.master.sdn_cluster_network_cidr }}" + when: ('http_proxy' in openshift.common and openshift.common.http_proxy != '') + notify: + - restart node + +- name: Reload systemd units + command: systemctl daemon-reload + when: (openshift.common.is_containerized | bool and (install_node_result | changed or install_ovs_sysconfig | changed or install_node_dep_result | changed)) or install_oom_fix_result | changed + notify: + - restart node diff --git a/roles/openshift_node_upgrade/templates/openshift.docker.node.dep.service b/roles/openshift_node_upgrade/templates/openshift.docker.node.dep.service new file mode 100644 index 000000000..0fb34cffd --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openshift.docker.node.dep.service @@ -0,0 +1,11 @@ +[Unit] +Requires=docker.service +After=docker.service +PartOf={{ openshift.common.service_type }}-node.service +Before={{ openshift.common.service_type }}-node.service + + +[Service] +ExecStart=/bin/bash -c "if [[ -f /usr/bin/docker-current ]]; then echo \"DOCKER_ADDTL_BIND_MOUNTS=--volume=/usr/bin/docker-current:/usr/bin/docker-current:ro --volume=/etc/sysconfig/docker:/etc/sysconfig/docker:ro\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; else echo \"#DOCKER_ADDTL_BIND_MOUNTS=\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; fi" +ExecStop= +SyslogIdentifier={{ openshift.common.service_type }}-node-dep diff --git a/roles/openshift_node_upgrade/templates/openshift.docker.node.service b/roles/openshift_node_upgrade/templates/openshift.docker.node.service new file mode 100644 index 000000000..e33d5d497 --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openshift.docker.node.service @@ -0,0 +1,26 @@ +[Unit] +After={{ openshift.common.service_type }}-master.service +After=docker.service +After=openvswitch.service +PartOf=docker.service +Requires=docker.service +{% if openshift.common.use_openshift_sdn %} +Requires=openvswitch.service +{% endif %} +Wants={{ openshift.common.service_type }}-master.service +Requires={{ openshift.common.service_type }}-node-dep.service +After={{ openshift.common.service_type }}-node-dep.service + +[Service] +EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-node +EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-node-dep +ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-node +ExecStart=/usr/bin/docker run --name {{ openshift.common.service_type }}-node --rm --privileged --net=host --pid=host --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-node -v /:/rootfs:ro -e CONFIG_FILE=${CONFIG_FILE} -e OPTIONS=${OPTIONS} -e HOST=/rootfs -e HOST_ETC=/host-etc -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }}{{ ':rslave' if openshift.docker.gte_1_10 | default(False) | bool else '' }} -v {{ openshift.common.config_base }}/node:{{ openshift.common.config_base }}/node {% if openshift_cloudprovider_kind | default('') != '' -%} -v {{ openshift.common.config_base }}/cloudprovider:{{ openshift.common.config_base}}/cloudprovider {% endif -%} -v /etc/localtime:/etc/localtime:ro -v /etc/machine-id:/etc/machine-id:ro -v /run:/run -v /sys:/sys:rw -v /sys/fs/cgroup:/sys/fs/cgroup:rw -v /usr/bin/docker:/usr/bin/docker:ro -v /var/lib/docker:/var/lib/docker -v /lib/modules:/lib/modules -v /etc/origin/openvswitch:/etc/openvswitch -v /etc/origin/sdn:/etc/openshift-sdn -v /etc/systemd/system:/host-etc/systemd/system -v /var/log:/var/log -v /dev:/dev $DOCKER_ADDTL_BIND_MOUNTS {{ openshift.node.node_image }}:${IMAGE_VERSION} +ExecStartPost=/usr/bin/sleep 10 +ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-node +SyslogIdentifier={{ openshift.common.service_type }}-node +Restart=always +RestartSec=5s + +[Install] +WantedBy=docker.service diff --git a/roles/openshift_node_upgrade/templates/openvswitch-avoid-oom.conf b/roles/openshift_node_upgrade/templates/openvswitch-avoid-oom.conf new file mode 100644 index 000000000..3229bc56b --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openvswitch-avoid-oom.conf @@ -0,0 +1,3 @@ +# Avoid the OOM killer for openvswitch and it's children: +[Service] +OOMScoreAdjust=-1000 diff --git a/roles/openshift_node_upgrade/templates/openvswitch.docker.service b/roles/openshift_node_upgrade/templates/openvswitch.docker.service new file mode 100644 index 000000000..1e1f8967d --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openvswitch.docker.service @@ -0,0 +1,17 @@ +[Unit] +After=docker.service +Requires=docker.service +PartOf=docker.service + +[Service] +EnvironmentFile=/etc/sysconfig/openvswitch +ExecStartPre=-/usr/bin/docker rm -f openvswitch +ExecStart=/usr/bin/docker run --name openvswitch --rm --privileged --net=host --pid=host -v /lib/modules:/lib/modules -v /run:/run -v /sys:/sys:ro -v /etc/origin/openvswitch:/etc/openvswitch {{ openshift.node.ovs_image }}:${IMAGE_VERSION} +ExecStartPost=/usr/bin/sleep 5 +ExecStop=/usr/bin/docker stop openvswitch +SyslogIdentifier=openvswitch +Restart=always +RestartSec=5s + +[Install] +WantedBy=docker.service diff --git a/roles/openshift_node_upgrade/templates/openvswitch.sysconfig.j2 b/roles/openshift_node_upgrade/templates/openvswitch.sysconfig.j2 new file mode 100644 index 000000000..da7c3742a --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openvswitch.sysconfig.j2 @@ -0,0 +1 @@ +IMAGE_VERSION={{ openshift_image_tag }} |