17 files changed, 447 insertions, 408 deletions
diff --git a/playbooks/common/openshift-cluster/initialize_facts.yml b/playbooks/common/openshift-cluster/initialize_facts.yml
index 04dde632b..6d83d2527 100644
--- a/playbooks/common/openshift-cluster/initialize_facts.yml
+++ b/playbooks/common/openshift-cluster/initialize_facts.yml
@@ -11,3 +11,5 @@
         hostname: "{{ openshift_hostname | default(None) }}"
   - set_fact:
       openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
+  - set_fact:
+      openshift_deployment_type: "{{ deployment_type }}"
diff --git a/playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml b/playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml
new file mode 100644
index 000000000..6e953be69
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml
@@ -0,0 +1,22 @@
+---
+- name: Check Docker image count
+  shell: "docker images -aq | wc -l"
+  register: docker_image_count
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- debug: var=docker_image_count.stdout
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- name: Remove unused Docker images for Docker 1.10+ migration
+  shell: "docker rmi `docker images -aq`"
+  # Will fail on images still in use:
+  failed_when: false
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- name: Check Docker image count
+  shell: "docker images -aq | wc -l"
+  register: docker_image_count
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- debug: var=docker_image_count.stdout
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml
index e8a20aa2b..78f6c46f3 100644
--- a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml
+++ b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml
@@ -9,6 +9,7 @@
     local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX
     register: local_cert_sync_tmpdir
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Create service signer certificate
   hosts: oo_first_master
@@ -17,6 +18,7 @@
     command: mktemp -d /tmp/openshift-ansible-XXXXXXX
     register: remote_cert_create_tmpdir
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
   - name: Create service signer certificate
     command: >
@@ -27,6 +29,7 @@
       --serial=service-signer.serial.txt
     args:
       chdir: "{{ remote_cert_create_tmpdir.stdout }}/"
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
   - name: Retrieve service signer certificate
     fetch:
@@ -38,12 +41,14 @@
     with_items:
     - "service-signer.crt"
     - "service-signer.key"
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
   - name: Delete remote temp directory
     file:
       name: "{{ remote_cert_create_tmpdir.stdout }}"
       state: absent
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Deploy service signer certificate
   hosts: oo_masters_to_config
@@ -55,6 +60,7 @@
     with_items:
     - "service-signer.crt"
     - "service-signer.key"
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Delete local temp directory
   hosts: localhost
@@ -67,3 +73,4 @@
       name: "{{ local_cert_sync_tmpdir.stdout }}"
       state: absent
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml
index 8002af4fc..fc26d029e 100644
--- a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml
+++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml
@@ -1,7 +1,7 @@
 ---
 
 # This snippet determines if a Docker upgrade is required by checking the inventory
-# variables, the available packages, and sets l_docker_version to True if so.
+# variables, the available packages, and sets l_docker_upgrade to True if so.
 
 - set_fact:
     docker_upgrade: True
diff --git a/playbooks/common/openshift-cluster/upgrades/init.yml b/playbooks/common/openshift-cluster/upgrades/init.yml
new file mode 100644
index 000000000..f3b3abe0d
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/init.yml
@@ -0,0 +1,50 @@
+---
+- include: ../verify_ansible_version.yml
+
+- hosts: localhost
+  connection: local
+  become: no
+  gather_facts: no
+  tasks:
+  - include_vars: ../../../byo/openshift-cluster/cluster_hosts.yml
+  - add_host:
+      name: "{{ item }}"
+      groups: l_oo_all_hosts
+    with_items: g_all_hosts | default([])
+
+- hosts: l_oo_all_hosts
+  gather_facts: no
+  tasks:
+  - include_vars: ../../../byo/openshift-cluster/cluster_hosts.yml
+
+- include: ../evaluate_groups.yml
+  vars:
+    # Do not allow adding hosts during upgrade.
+    g_new_master_hosts: []
+    g_new_node_hosts: []
+    openshift_cluster_id: "{{ cluster_id | default('default') }}"
+    openshift_deployment_type: "{{ deployment_type }}"
+
+- name: Set oo_options
+  hosts: oo_all_hosts
+  tasks:
+  - set_fact:
+      openshift_docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') }}"
+    when: openshift_docker_additional_registries is not defined
+  - set_fact:
+      openshift_docker_insecure_registries: "{{ lookup('oo_option',  'docker_insecure_registries') }}"
+    when: openshift_docker_insecure_registries is not defined
+  - set_fact:
+      openshift_docker_blocked_registries: "{{ lookup('oo_option', 'docker_blocked_registries') }}"
+    when: openshift_docker_blocked_registries is not defined
+  - set_fact:
+      openshift_docker_options: "{{ lookup('oo_option', 'docker_options') }}"
+    when: openshift_docker_options is not defined
+  - set_fact:
+      openshift_docker_log_driver: "{{ lookup('oo_option', 'docker_log_driver') }}"
+    when: openshift_docker_log_driver is not defined
+  - set_fact:
+      openshift_docker_log_options: "{{ lookup('oo_option', 'docker_log_options') }}"
+    when: openshift_docker_log_options is not defined
+
+- include: ../initialize_facts.yml
diff --git a/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml
new file mode 100644
index 000000000..4e375ac26
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml
@@ -0,0 +1,40 @@
+---
+- name: Filter list of nodes to be upgraded if necessary
+  hosts: oo_first_master
+  tasks:
+  - name: Retrieve list of openshift nodes matching upgrade label
+    command: >
+      {{ openshift.common.client_binary }}
+      get nodes
+      --config={{ openshift.common.config_base }}/master/admin.kubeconfig
+      --selector={{ openshift_upgrade_nodes_label }}
+      -o jsonpath='{.items[*].metadata.name}'
+    register: matching_nodes
+    changed_when: false
+    when: openshift_upgrade_nodes_label is defined
+
+  - set_fact:
+      nodes_to_upgrade: "{{ matching_nodes.stdout.split(' ') }}"
+    when: openshift_upgrade_nodes_label is defined
+
+  # We got a list of nodes with the label, now we need to match these with inventory hosts
+  # using their openshift.common.hostname fact.
+  - name: Map labelled nodes to inventory hosts
+    add_host:
+      name: "{{ item }}"
+      groups: temp_nodes_to_upgrade
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: " {{ groups['oo_nodes_to_config'] }}"
+    when: openshift_upgrade_nodes_label is defined and hostvars[item].openshift.common.hostname in nodes_to_upgrade
+    changed_when: false
+
+  # Build up the oo_nodes_to_upgrade group, use the list filtered by label if
+  # present, otherwise hit all nodes:
+  - name: Evaluate oo_nodes_to_upgrade
+    add_host:
+      name: "{{ item }}"
+      groups: oo_nodes_to_upgrade
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: "{{ groups['temp_nodes_to_upgrade'] | default(groups['oo_nodes_to_config']) }}"
diff --git a/playbooks/common/openshift-cluster/upgrades/post.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml
index e43954453..e43954453 100644
--- a/playbooks/common/openshift-cluster/upgrades/post.yml
+++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml
diff --git a/playbooks/common/openshift-cluster/upgrades/pre.yml b/playbooks/common/openshift-cluster/upgrades/pre.yml
deleted file mode 100644
index 42a24eaf8..000000000
--- a/playbooks/common/openshift-cluster/upgrades/pre.yml
+++ /dev/null
@@ -1,311 +0,0 @@
----
-###############################################################################
-# Evaluate host groups and gather facts
-###############################################################################
-
-- include: ../initialize_facts.yml
-
-- name: Update repos and initialize facts on all hosts
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config
-  roles:
-  - openshift_repos
-
-- name: Set openshift_no_proxy_internal_hostnames
-  hosts: oo_masters_to_config:oo_nodes_to_config
-  tasks:
-  - set_fact:
-      openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config']
-                                                    | union(groups['oo_masters_to_config'])
-                                                    | union(groups['oo_etcd_to_config'] | default([])))
-                                                | oo_collect('openshift.common.hostname') | default([]) | join (',')
-                                                }}"
-    when: "{{ (openshift_http_proxy is defined or openshift_https_proxy is defined) and
-            openshift_generate_no_proxy_hosts | default(True) | bool }}"
-
-- name: Evaluate additional groups for upgrade
-  hosts: localhost
-  connection: local
-  become: no
-  tasks:
-  - name: Evaluate etcd_hosts_to_backup
-    add_host:
-      name: "{{ item }}"
-      groups: etcd_hosts_to_backup
-    with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master
-
-###############################################################################
-# Pre-upgrade checks
-###############################################################################
-- name: Verify upgrade can proceed on first master
-  hosts: oo_first_master
-  vars:
-    g_pacemaker_upgrade_url_segment: "{{ 'org/latest' if deployment_type =='origin' else '.com/enterprise/3.1' }}"
-  gather_facts: no
-  tasks:
-  - fail:
-      msg: >
-        This upgrade is only supported for atomic-enterprise, origin, openshift-enterprise, and online
-        deployment types
-    when: deployment_type not in ['atomic-enterprise', 'origin','openshift-enterprise', 'online']
-
-  - fail:
-      msg: >
-        This upgrade does not support Pacemaker:
-        https://docs.openshift.{{ g_pacemaker_upgrade_url_segment }}/install_config/upgrading/pacemaker_to_native_ha.html
-    when: openshift.master.cluster_method is defined and openshift.master.cluster_method == 'pacemaker'
-
-  # Error out in situations where the user has older versions specified in their
-  # inventory in any of the openshift_release, openshift_image_tag, and
-  # openshift_pkg_version variables. These must be removed or updated to proceed
-  # with upgrade.
-  # TODO: Should we block if you're *over* the next major release version as well?
-  - fail:
-      msg: >
-        openshift_pkg_version is {{ openshift_pkg_version }} which is not a
-        valid version for a {{ openshift_upgrade_target }} upgrade
-    when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare(openshift_upgrade_target ,'<')
-
-  - fail:
-      msg: >
-        openshift_image_tag is {{ openshift_image_tag }} which is not a
-        valid version for a {{ openshift_upgrade_target }} upgrade
-    when: openshift_image_tag is defined and openshift_image_tag.split('v',1).1 | version_compare(openshift_upgrade_target ,'<')
-
-  - set_fact:
-      openshift_release: "{{ openshift_release[1:] }}"
-    when: openshift_release is defined and openshift_release[0] == 'v'
-
-  - fail:
-      msg: >
-        openshift_release is {{ openshift_release }} which is not a
-        valid release for a {{ openshift_upgrade_target }} upgrade
-    when: openshift_release is defined and not openshift_release | version_compare(openshift_upgrade_target ,'=')
-
-- include: ../../../common/openshift-cluster/initialize_openshift_version.yml
-  vars:
-    # Request specific openshift_release and let the openshift_version role handle converting this
-    # to a more specific version, respecting openshift_image_tag and openshift_pkg_version if
-    # defined, and overriding the normal behavior of protecting the installed version
-    openshift_release: "{{ openshift_upgrade_target }}"
-    openshift_protect_installed_version: False
-    # Docker role (a dependency) should be told not to do anything to installed version
-    # of docker, we handle this separately during upgrade. (the inventory may have a
-    # docker_version defined, we don't want to actually do it until later)
-    docker_protect_installed_version: True
-
-- name: Verify master processes
-  hosts: oo_masters_to_config
-  roles:
-  - openshift_facts
-  tasks:
-  - openshift_facts:
-      role: master
-      local_facts:
-        ha: "{{ groups.oo_masters_to_config | length > 1 }}"
-
-  - name: Ensure Master is running
-    service:
-      name: "{{ openshift.common.service_type }}-master"
-      state: started
-      enabled: yes
-    when: openshift.master.ha is defined and not openshift.master.ha | bool and openshift.common.is_containerized | bool
-
-  - name: Ensure HA Master is running
-    service:
-      name: "{{ openshift.common.service_type }}-master-api"
-      state: started
-      enabled: yes
-    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
-
-  - name: Ensure HA Master is running
-    service:
-      name: "{{ openshift.common.service_type }}-master-controllers"
-      state: started
-      enabled: yes
-    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
-
-- name: Verify node processes
-  hosts: oo_nodes_to_config
-  roles:
-  - openshift_facts
-  - openshift_docker_facts
-  tasks:
-  - name: Ensure Node is running
-    service:
-      name: "{{ openshift.common.service_type }}-node"
-      state: started
-      enabled: yes
-    when: openshift.common.is_containerized | bool
-
-- name: Verify upgrade targets
-  hosts: oo_masters_to_config:oo_nodes_to_config
-  vars:
-    openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
-  pre_tasks:
-  - fail:
-      msg: Verify OpenShift is already installed
-    when: openshift.common.version is not defined
-
-  - fail:
-      msg: Verify the correct version was found
-    when: verify_upgrade_version is defined and openshift_version != verify_upgrade_version
-
-  - name: Clean package cache
-    command: "{{ ansible_pkg_mgr }} clean all"
-    when: not openshift.common.is_atomic | bool
-
-  - set_fact:
-      g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}"
-    when: not openshift.common.is_containerized | bool
-
-  - name: Verify containers are available for upgrade
-    command: >
-      docker pull {{ openshift.common.cli_image }}:{{ openshift_image_tag }}
-    register: pull_result
-    changed_when: "'Downloaded newer image' in pull_result.stdout"
-    when: openshift.common.is_containerized | bool
-
-  - name: Check latest available OpenShift RPM version
-    command: >
-      {{ repoquery_cmd }} --qf '%{version}' "{{ openshift.common.service_type }}"
-    failed_when: false
-    changed_when: false
-    register: avail_openshift_version
-    when: not openshift.common.is_containerized | bool
-
-  - name: Verify OpenShift RPMs are available for upgrade
-    fail:
-      msg: "OpenShift {{ avail_openshift_version.stdout }} is available, but {{ openshift_upgrade_target }} or greater is required"
-    when: not openshift.common.is_containerized | bool and not avail_openshift_version | skipped and avail_openshift_version.stdout | default('0.0', True) | version_compare(openshift_release, '<')
-
-  - fail:
-      msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later"
-    when: deployment_type == 'origin' and openshift.common.version | version_compare(openshift_upgrade_min,'<')
-
-- name: Verify docker upgrade targets
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
-  tasks:
-  # Only check if docker upgrade is required if docker_upgrade is not
-  # already set to False.
-  - include: docker/upgrade_check.yml
-    when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool
-
-  # Additional checks for Atomic hosts:
-
-  - name: Determine available Docker
-    shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker"
-    register: g_atomic_docker_version_result
-    when: openshift.common.is_atomic | bool
-
-  - set_fact:
-      l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}"
-    when: openshift.common.is_atomic | bool
-
-  - fail:
-      msg: This playbook requires access to Docker 1.10 or later
-    when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<')
-
-  - set_fact:
-      pre_upgrade_complete: True
-
-
-##############################################################################
-# Gate on pre-upgrade checks
-##############################################################################
-- name: Gate on pre-upgrade checks
-  hosts: localhost
-  connection: local
-  become: no
-  vars:
-    pre_upgrade_hosts: "{{ groups.oo_masters_to_config | union(groups.oo_nodes_to_config) }}"
-  tasks:
-  - set_fact:
-      pre_upgrade_completed: "{{ hostvars
-                                 | oo_select_keys(pre_upgrade_hosts)
-                                 | oo_collect('inventory_hostname', {'pre_upgrade_complete': true}) }}"
-  - set_fact:
-      pre_upgrade_failed: "{{ pre_upgrade_hosts | difference(pre_upgrade_completed) }}"
-  - fail:
-      msg: "Upgrade cannot continue. The following hosts did not complete pre-upgrade checks: {{ pre_upgrade_failed | join(',') }}"
-    when: pre_upgrade_failed | length > 0
-
-###############################################################################
-# Backup etcd
-###############################################################################
-- name: Backup etcd
-  hosts: etcd_hosts_to_backup
-  vars:
-    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
-    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
-  roles:
-  - openshift_facts
-  tasks:
-  # Ensure we persist the etcd role for this host in openshift_facts
-  - openshift_facts:
-      role: etcd
-      local_facts: {}
-    when: "'etcd' not in openshift"
-
-  - stat: path=/var/lib/openshift
-    register: var_lib_openshift
-
-  - stat: path=/var/lib/origin
-    register: var_lib_origin
-
-  - name: Create origin symlink if necessary
-    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
-    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
-
-  # TODO: replace shell module with command and update later checks
-  # We assume to be using the data dir for all backups.
-  - name: Check available disk space for etcd backup
-    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
-    register: avail_disk
-
-  # TODO: replace shell module with command and update later checks
-  - name: Check current embedded etcd disk usage
-    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
-    register: etcd_disk_usage
-    when: embedded_etcd | bool
-
-  - name: Abort if insufficient disk space for etcd backup
-    fail:
-      msg: >
-        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
-        {{ avail_disk.stdout }} Kb available.
-    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
-
-  - name: Install etcd (for etcdctl)
-    action: "{{ ansible_pkg_mgr }} name=etcd state=latest"
-    when: not openshift.common.is_atomic | bool
-
-  - name: Generate etcd backup
-    command: >
-      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
-      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
-
-  - set_fact:
-      etcd_backup_complete: True
-
-  - name: Display location of etcd backup
-    debug:
-      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
-
-
-##############################################################################
-# Gate on etcd backup
-##############################################################################
-- name: Gate on etcd backup
-  hosts: localhost
-  connection: local
-  become: no
-  tasks:
-  - set_fact:
-      etcd_backup_completed: "{{ hostvars
-                                 | oo_select_keys(groups.etcd_hosts_to_backup)
-                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
-  - set_fact:
-      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
-  - fail:
-      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
-    when: etcd_backup_failed | length > 0
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml b/playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml
new file mode 100644
index 000000000..8ecae4539
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml
@@ -0,0 +1,6 @@
+---
+- name: Flag pre-upgrade checks complete for hosts without errors
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
+  tasks:
+  - set_fact:
+      pre_upgrade_complete: True
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/roles b/playbooks/common/openshift-cluster/upgrades/pre/roles
new file mode 120000
index 000000000..415645be6
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/roles
@@ -0,0 +1 @@
+../../../../../roles/
+\ No newline at end of file
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml
new file mode 100644
index 000000000..06eb5f936
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml
@@ -0,0 +1,31 @@
+---
+- name: Verify master processes
+  hosts: oo_masters_to_config
+  roles:
+  - openshift_facts
+  tasks:
+  - openshift_facts:
+      role: master
+      local_facts:
+        ha: "{{ groups.oo_masters_to_config | length > 1 }}"
+
+  - name: Ensure Master is running
+    service:
+      name: "{{ openshift.common.service_type }}-master"
+      state: started
+      enabled: yes
+    when: openshift.master.ha is defined and not openshift.master.ha | bool and openshift.common.is_containerized | bool
+
+  - name: Ensure HA Master is running
+    service:
+      name: "{{ openshift.common.service_type }}-master-api"
+      state: started
+      enabled: yes
+    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
+
+  - name: Ensure HA Master is running
+    service:
+      name: "{{ openshift.common.service_type }}-master-controllers"
+      state: started
+      enabled: yes
+    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml
new file mode 100644
index 000000000..ba4d77617
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml
@@ -0,0 +1,23 @@
+---
+- name: Verify docker upgrade targets
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
+  tasks:
+  # Only check if docker upgrade is required if docker_upgrade is not
+  # already set to False.
+  - include: ../docker/upgrade_check.yml
+    when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool
+
+  # Additional checks for Atomic hosts:
+
+  - name: Determine available Docker
+    shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker"
+    register: g_atomic_docker_version_result
+    when: openshift.common.is_atomic | bool
+
+  - set_fact:
+      l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}"
+    when: openshift.common.is_atomic | bool
+
+  - fail:
+      msg: This playbook requires access to Docker 1.10 or later
+    when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<')
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml
new file mode 100644
index 000000000..9a959a959
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml
@@ -0,0 +1,37 @@
+---
+- name: Verify upgrade can proceed on first master
+  hosts: oo_first_master
+  gather_facts: no
+  tasks:
+  - fail:
+      msg: >
+        This upgrade is only supported for origin, openshift-enterprise, and online
+        deployment types
+    when: deployment_type not in ['origin','openshift-enterprise', 'online']
+
+  # Error out in situations where the user has older versions specified in their
+  # inventory in any of the openshift_release, openshift_image_tag, and
+  # openshift_pkg_version variables. These must be removed or updated to proceed
+  # with upgrade.
+  # TODO: Should we block if you're *over* the next major release version as well?
+  - fail:
+      msg: >
+        openshift_pkg_version is {{ openshift_pkg_version }} which is not a
+        valid version for a {{ openshift_upgrade_target }} upgrade
+    when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare(openshift_upgrade_target ,'<')
+
+  - fail:
+      msg: >
+        openshift_image_tag is {{ openshift_image_tag }} which is not a
+        valid version for a {{ openshift_upgrade_target }} upgrade
+    when: openshift_image_tag is defined and openshift_image_tag.split('v',1).1 | version_compare(openshift_upgrade_target ,'<')
+
+  - set_fact:
+      openshift_release: "{{ openshift_release[1:] }}"
+    when: openshift_release is defined and openshift_release[0] == 'v'
+
+  - fail:
+      msg: >
+        openshift_release is {{ openshift_release }} which is not a
+        valid release for a {{ openshift_upgrade_target }} upgrade
+    when: openshift_release is defined and not openshift_release | version_compare(openshift_upgrade_target ,'=')
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml
new file mode 100644
index 000000000..354af3cde
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml
@@ -0,0 +1,13 @@
+---
+- name: Verify node processes
+  hosts: oo_nodes_to_config
+  roles:
+  - openshift_facts
+  - openshift_docker_facts
+  tasks:
+  - name: Ensure Node is running
+    service:
+      name: "{{ openshift.common.service_type }}-node"
+      state: started
+      enabled: yes
+    when: openshift.common.is_containerized | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml
new file mode 100644
index 000000000..9632626a4
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml
@@ -0,0 +1,45 @@
+---
+- name: Verify upgrade targets
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade
+  vars:
+    openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
+  pre_tasks:
+  - fail:
+      msg: Verify OpenShift is already installed
+    when: openshift.common.version is not defined
+
+  - fail:
+      msg: Verify the correct version was found
+    when: verify_upgrade_version is defined and openshift_version != verify_upgrade_version
+
+  - name: Clean package cache
+    command: "{{ ansible_pkg_mgr }} clean all"
+    when: not openshift.common.is_atomic | bool
+
+  - set_fact:
+      g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}"
+    when: not openshift.common.is_containerized | bool
+
+  - name: Verify containers are available for upgrade
+    command: >
+      docker pull {{ openshift.common.cli_image }}:{{ openshift_image_tag }}
+    register: pull_result
+    changed_when: "'Downloaded newer image' in pull_result.stdout"
+    when: openshift.common.is_containerized | bool
+
+  - name: Check latest available OpenShift RPM version
+    command: >
+      {{ repoquery_cmd }} --qf '%{version}' "{{ openshift.common.service_type }}"
+    failed_when: false
+    changed_when: false
+    register: avail_openshift_version
+    when: not openshift.common.is_containerized | bool
+
+  - name: Verify OpenShift RPMs are available for upgrade
+    fail:
+      msg: "OpenShift {{ avail_openshift_version.stdout }} is available, but {{ openshift_upgrade_target }} or greater is required"
+    when: not openshift.common.is_containerized | bool and not avail_openshift_version | skipped and avail_openshift_version.stdout | default('0.0', True) | version_compare(openshift_release, '<')
+
+  - fail:
+      msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later"
+    when: deployment_type == 'origin' and openshift.common.version | version_compare(openshift_upgrade_min,'<')
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
index 8a2784fb4..2c641e21e 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
@@ -1,39 +1,93 @@
 ---
 ###############################################################################
-# The restart playbook should be run after this playbook completes.
+# Upgrade Masters
 ###############################################################################
-
-# Separate step so we can execute in parallel and clear out anything unused
-# before we get into the serialized upgrade process which will then remove
-# remaining images if possible.
-- name: Cleanup unused Docker images
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
+- name: Evaluate additional groups for upgrade
+  hosts: localhost
+  connection: local
+  become: no
   tasks:
-  - name: Check Docker image count
-    shell: "docker images -aq | wc -l"
-    register: docker_image_count
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+  - name: Evaluate etcd_hosts_to_backup
+    add_host:
+      name: "{{ item }}"
+      groups: etcd_hosts_to_backup
+    with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master
+
+- name: Backup etcd
+  hosts: etcd_hosts_to_backup
+  vars:
+    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
+    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
+  roles:
+  - openshift_facts
+  tasks:
+  # Ensure we persist the etcd role for this host in openshift_facts
+  - openshift_facts:
+      role: etcd
+      local_facts: {}
+    when: "'etcd' not in openshift"
+
+  - stat: path=/var/lib/openshift
+    register: var_lib_openshift
+
+  - stat: path=/var/lib/origin
+    register: var_lib_origin
+
+  - name: Create origin symlink if necessary
+    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
+    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
+
+  # TODO: replace shell module with command and update later checks
+  # We assume to be using the data dir for all backups.
+  - name: Check available disk space for etcd backup
+    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
+    register: avail_disk
+
+  # TODO: replace shell module with command and update later checks
+  - name: Check current embedded etcd disk usage
+    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
+    register: etcd_disk_usage
+    when: embedded_etcd | bool
+
+  - name: Abort if insufficient disk space for etcd backup
+    fail:
+      msg: >
+        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
+        {{ avail_disk.stdout }} Kb available.
+    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
+
+  - name: Install etcd (for etcdctl)
+    action: "{{ ansible_pkg_mgr }} name=etcd state=latest"
+    when: not openshift.common.is_atomic | bool
+
+  - name: Generate etcd backup
+    command: >
+      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
+      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
 
-  - debug: var=docker_image_count.stdout
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+  - set_fact:
+      etcd_backup_complete: True
 
-  - name: Remove unused Docker images for Docker 1.10+ migration
-    shell: "docker rmi `docker images -aq`"
-    # Will fail on images still in use:
-    failed_when: false
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+  - name: Display location of etcd backup
+    debug:
+      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
 
-  - name: Check Docker image count
-    shell: "docker images -aq | wc -l"
-    register: docker_image_count
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
 
-  - debug: var=docker_image_count.stdout
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+- name: Gate on etcd backup
+  hosts: localhost
+  connection: local
+  become: no
+  tasks:
+  - set_fact:
+      etcd_backup_completed: "{{ hostvars
+                                 | oo_select_keys(groups.etcd_hosts_to_backup)
+                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
+  - set_fact:
+      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
+  - fail:
+      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
+    when: etcd_backup_failed | length > 0
 
-###############################################################################
-# Upgrade Masters
-###############################################################################
 - name: Upgrade master packages
   hosts: oo_masters_to_config
   handlers:
@@ -57,7 +111,6 @@
 # Create service signer cert when missing. Service signer certificate
 # is added to master config in the master config hook for v3_3.
 - include: create_service_signer_cert.yml
-  when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Upgrade master config and systemd units
   hosts: oo_masters_to_config
@@ -143,9 +196,9 @@
     origin_reconcile_bindings: "{{ deployment_type == 'origin' and openshift_version | version_compare('1.0.6', '>') }}"
     ent_reconcile_bindings: true
     openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
-    # Similar to pre.yml, we don't want to upgrade docker during the openshift_cli role,
-    # it will be updated when we perform node upgrade.
-    docker_protect_installed_version: True
+    # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe
+    # restart.
+    skip_docker_role: True
   tasks:
   - name: Verifying the correct commandline tools are available
     shell: grep {{ verify_upgrade_version }} {{ openshift.common.admin_binary}}
@@ -177,71 +230,6 @@
   - set_fact:
       reconcile_complete: True
 
-###############################################################################
-# Upgrade Nodes
-###############################################################################
-
-# Here we handle all tasks that might require a node evac. (upgrading docker, and the node service)
-- name: Perform upgrades that may require node evacuation
-  hosts: oo_masters_to_config:oo_etcd_to_config:oo_nodes_to_config
-  serial: 1
-  any_errors_fatal: true
-  roles:
-  - openshift_facts
-  handlers:
-  - include: ../../../../roles/openshift_node/handlers/main.yml
-    static: yes
-  tasks:
-  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
-  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
-  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
-  - name: Determine if node is currently scheduleable
-    command: >
-      {{ openshift.common.client_binary }} get node {{ openshift.node.nodename }} -o json
-    register: node_output
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    changed_when: false
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - set_fact:
-      was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - name: Mark unschedulable if host is a node
-    command: >
-      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=false
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - name: Evacuate Node for Kubelet upgrade
-    command: >
-      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --evacuate --force
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - include: docker/upgrade.yml
-    when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool
-  - include: "{{ node_config_hook }}"
-    when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_config
-
-  - include: rpm_upgrade.yml
-    vars:
-       component: "node"
-       openshift_version: "{{ openshift_pkg_version | default('') }}"
-    when: inventory_hostname in groups.oo_nodes_to_config and not openshift.common.is_containerized | bool
-
-  - include: containerized_node_upgrade.yml
-    when: inventory_hostname in groups.oo_nodes_to_config and openshift.common.is_containerized | bool
-
-  - meta: flush_handlers
-
-  - name: Set node schedulability
-    command: >
-      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=true
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: inventory_hostname in groups.oo_nodes_to_config and was_schedulable | bool
-
-
 ##############################################################################
 # Gate on reconcile
 ##############################################################################
@@ -259,3 +247,13 @@
   - fail:
       msg: "Upgrade cannot continue. The following masters did not finish reconciling: {{ reconcile_failed | join(',') }}"
     when: reconcile_failed | length > 0
+
+- name: Upgrade Docker on dedicated containerized etcd hosts
+  hosts: oo_etcd_to_config:!oo_nodes_to_upgrade
+  serial: 1
+  any_errors_fatal: true
+  roles:
+  - openshift_facts
+  tasks:
+  - include: docker/upgrade.yml
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
new file mode 100644
index 000000000..9b572dcdf
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -0,0 +1,75 @@
+---
+- name: Evacuate and upgrade nodes
+  hosts: oo_nodes_to_upgrade
+  # This var must be set with -e on invocation, as it is not a per-host inventory var
+  # and is evaluated early. Values such as "20%" can also be used.
+  serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
+  any_errors_fatal: true
+  roles:
+  - openshift_facts
+  - docker
+  handlers:
+  - include: ../../../../roles/openshift_node/handlers/main.yml
+    static: yes
+  pre_tasks:
+  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
+  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
+  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
+  - name: Determine if node is currently scheduleable
+    command: >
+      {{ openshift.common.client_binary }} get node {{ openshift.node.nodename | lower }} -o json
+    register: node_output
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    changed_when: false
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+
+  - set_fact:
+      was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+
+  - name: Mark unschedulable if host is a node
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+    # NOTE: There is a transient "object has been modified" error here, allow a couple
+    # retries for a more reliable upgrade.
+    register: node_unsched
+    until: node_unsched.rc == 0
+    retries: 3
+    delay: 1
+
+  - name: Evacuate Node for Kubelet upgrade
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --evacuate --force
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+  tasks:
+  - include: docker/upgrade.yml
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool
+
+  - include: "{{ node_config_hook }}"
+    when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_upgrade
+
+  - include: rpm_upgrade.yml
+    vars:
+       component: "node"
+       openshift_version: "{{ openshift_pkg_version | default('') }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool
+
+  - include: containerized_node_upgrade.yml
+    when: inventory_hostname in groups.oo_nodes_to_upgrade and openshift.common.is_containerized | bool
+
+  - meta: flush_handlers
+
+  - name: Set node schedulability
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool
+    register: node_sched
+    until: node_sched.rc == 0
+    retries: 3
+    delay: 1
+
+