18 files changed, 261 insertions, 48 deletions
diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml b/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml
new file mode 100644
index 000000000..14b0f85d4
--- /dev/null
+++ b/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml
@@ -0,0 +1,7 @@
+---
+#
+# Node Scale Group Upgrade Playbook
+#
+# Upgrades scale group nodes only.
+#
+- include: ../../../../common/openshift-cluster/upgrades/upgrade_scale_group.yml
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml
new file mode 100644
index 000000000..d9ce3a7e3
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml
@@ -0,0 +1,59 @@
+---
+- name: create new scale group
+  hosts: localhost
+  tasks:
+  - name: build upgrade scale groups
+    include_role:
+      name: openshift_aws
+      tasks_from: upgrade_node_group.yml
+
+  - fail:
+      msg: "Ensure that new scale groups were provisioned before proceeding to update."
+    when:
+    - "'oo_sg_new_nodes' not in groups or groups.oo_sg_new_nodes|length == 0"
+
+- name: initialize upgrade bits
+  include: init.yml
+
+- name: Drain and upgrade nodes
+  hosts: oo_sg_current_nodes
+  # This var must be set with -e on invocation, as it is not a per-host inventory var
+  # and is evaluated early. Values such as "20%" can also be used.
+  serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
+  max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}"
+
+  pre_tasks:
+  - name: Load lib_openshift modules
+    include_role:
+      name: ../roles/lib_openshift
+
+  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
+  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
+  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
+  - name: Mark node unschedulable
+    oc_adm_manage_node:
+      node: "{{ openshift.node.nodename | lower }}"
+      schedulable: False
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    retries: 10
+    delay: 5
+    register: node_unschedulable
+    until: node_unschedulable|succeeded
+
+  - name: Drain Node for Kubelet upgrade
+    command: >
+      {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    register: l_upgrade_nodes_drain_result
+    until: not l_upgrade_nodes_drain_result | failed
+    retries: 60
+    delay: 60
+
+# Alright, let's clean up!
+- name: clean up the old scale group
+  hosts: localhost
+  tasks:
+  - name: clean up scale group
+    include_role:
+      name: openshift_aws
+      tasks_from: remove_scale_group.yml
diff --git a/roles/openshift_aws/defaults/main.yml b/roles/openshift_aws/defaults/main.yml
index c9a429675..42ef22846 100644
--- a/roles/openshift_aws/defaults/main.yml
+++ b/roles/openshift_aws/defaults/main.yml
@@ -6,6 +6,9 @@ openshift_aws_create_security_groups: True
 openshift_aws_create_launch_config: True
 openshift_aws_create_scale_group: True
 
+openshift_aws_current_version: ''
+openshift_aws_new_version: ''
+
 openshift_aws_wait_for_ssh: True
 
 openshift_aws_clusterid: default
@@ -39,6 +42,7 @@ openshift_aws_ami_tags:
   bootstrap: "true"
   openshift-created: "true"
   clusterid: "{{ openshift_aws_clusterid }}"
+  parent: "{{ openshift_aws_base_ami | default('unknown') }}"
 
 openshift_aws_s3_mode: create
 openshift_aws_s3_bucket_name: "{{ openshift_aws_clusterid }}-docker-registry"
@@ -114,7 +118,6 @@ openshift_aws_node_group_config_tags: "{{ openshift_aws_clusterid | build_instan
 openshift_aws_node_group_termination_policy: Default
 openshift_aws_node_group_replace_instances: []
 openshift_aws_node_group_replace_all_instances: False
-openshift_aws_node_group_config_extra_labels: {}
 
 openshift_aws_ami_map:
   master: "{{ openshift_aws_ami }}"
@@ -135,8 +138,8 @@ openshift_aws_master_group_config:
     tags:
       host-type: master
       sub-host-type: default
-    labels:
-      type: master
+      runtime: docker
+      version: "{{ openshift_aws_new_version }}"
     wait_for_instances: True
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
@@ -159,8 +162,8 @@ openshift_aws_node_group_config:
     tags:
       host-type: node
       sub-host-type: compute
-    labels:
-      type: compute
+      runtime: docker
+      version: "{{ openshift_aws_new_version }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     iam_role: "{{ openshift_aws_iam_role_name }}"
@@ -179,8 +182,8 @@ openshift_aws_node_group_config:
     tags:
       host-type: node
       sub-host-type: infra
-    labels:
-      type: infra
+      runtime: docker
+      version: "{{ openshift_aws_new_version }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     iam_role: "{{ openshift_aws_iam_role_name }}"
@@ -262,7 +265,7 @@ openshift_aws_node_security_groups:
 openshift_aws_vpc_tags:
   Name: "{{ openshift_aws_vpc_name }}"
 
-openshift_aws_subnet_name: us-east-1c
+openshift_aws_subnet_az: us-east-1c
 
 openshift_aws_vpc:
   name: "{{ openshift_aws_vpc_name }}"
@@ -280,6 +283,10 @@ openshift_aws_node_run_bootstrap_startup: True
 openshift_aws_node_user_data: ''
 openshift_aws_node_config_namespace: openshift-node
 
+openshift_aws_node_groups: nodes
+
+openshift_aws_masters_groups: masters,etcd,nodes
+
 # If creating extra node groups, you'll need to define all of the following
 
 # The format is the same as openshift_aws_node_group_config, but the top-level
diff --git a/roles/openshift_aws/filter_plugins/openshift_aws_filters.py b/roles/openshift_aws/filter_plugins/openshift_aws_filters.py
index a9893c0a7..e707abd3f 100644
--- a/roles/openshift_aws/filter_plugins/openshift_aws_filters.py
+++ b/roles/openshift_aws/filter_plugins/openshift_aws_filters.py
@@ -9,6 +9,18 @@ class FilterModule(object):
     ''' Custom ansible filters for use by openshift_aws role'''
 
     @staticmethod
+    def scale_groups_match_capacity(scale_group_info):
+        ''' This function will verify that the scale group instance count matches
+            the scale group desired capacity
+
+        '''
+        for scale_group in scale_group_info:
+            if scale_group['desired_capacity'] != len(scale_group['instances']):
+                return False
+
+        return True
+
+    @staticmethod
     def build_instance_tags(clusterid):
         ''' This function will return a dictionary of the instance tags.
 
@@ -25,4 +37,5 @@ class FilterModule(object):
 
     def filters(self):
         ''' returns a mapping of filters to methods '''
-        return {'build_instance_tags': self.build_instance_tags}
+        return {'build_instance_tags': self.build_instance_tags,
+                'scale_groups_match_capacity': self.scale_groups_match_capacity}
diff --git a/roles/openshift_aws/meta/main.yml b/roles/openshift_aws/meta/main.yml
index 875efcb8f..fa0a24a33 100644
--- a/roles/openshift_aws/meta/main.yml
+++ b/roles/openshift_aws/meta/main.yml
@@ -1,3 +1,4 @@
 ---
 dependencies:
 - lib_utils
+- lib_openshift
diff --git a/roles/openshift_aws/tasks/accept_nodes.yml b/roles/openshift_aws/tasks/accept_nodes.yml
new file mode 100644
index 000000000..ae320962f
--- /dev/null
+++ b/roles/openshift_aws/tasks/accept_nodes.yml
@@ -0,0 +1,34 @@
+---
+- name: fetch masters
+  ec2_remote_facts:
+    region: "{{ openshift_aws_region | default('us-east-1') }}"
+    filters:
+      "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
+           'tag:host-type': 'master', 'instance-state-name': 'running'} }}"
+  register: mastersout
+  retries: 20
+  delay: 3
+  until: "'instances' in mastersout and mastersout.instances|length > 0"
+
+- name: fetch new node instances
+  ec2_remote_facts:
+    region: "{{ openshift_aws_region }}"
+    filters:
+      "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
+           'tag:host-type': 'node',
+           'instance-state-name': 'running'} }}"
+  register: instancesout
+  retries: 20
+  delay: 3
+  until: "'instances' in instancesout and instancesout.instances|length > 0"
+
+- debug:
+    msg: "{{ instancesout.instances|map(attribute='private_dns_name') | list }}"
+
+- name: approve nodes
+  oc_adm_csr:
+    #approve_all: True
+    nodes: "{{ instancesout.instances|map(attribute='private_dns_name') | list  }}"
+    timeout: 60
+  register: nodeout
+  delegate_to: "{{ mastersout.instances[0].public_ip_address }}"
diff --git a/roles/openshift_aws/tasks/elb.yml b/roles/openshift_aws/tasks/elb.yml
index a543222d5..5d371ec7a 100644
--- a/roles/openshift_aws/tasks/elb.yml
+++ b/roles/openshift_aws/tasks/elb.yml
@@ -17,6 +17,7 @@
     listeners: "{{ item.value }}"
     scheme: "{{ openshift_aws_elb_scheme }}"
     tags: "{{ openshift_aws_elb_tags }}"
+    wait: True
   register: new_elb
   with_dict: "{{ l_elb_dict_item.value }}"
 
diff --git a/roles/openshift_aws/tasks/launch_config_create.yml b/roles/openshift_aws/tasks/launch_config_create.yml
index a688496d2..f7f0f0953 100644
--- a/roles/openshift_aws/tasks/launch_config_create.yml
+++ b/roles/openshift_aws/tasks/launch_config_create.yml
@@ -10,7 +10,7 @@
 # Create the scale group config
 - name: Create the node scale group launch config
   ec2_lc:
-    name: "{{ openshift_aws_launch_config_basename }}-{{ launch_config_item.key }}-{{ l_epoch_time }}"
+    name: "{{ openshift_aws_launch_config_basename }}-{{ launch_config_item.key }}{{'-' ~ openshift_aws_new_version if openshift_aws_new_version != '' else '' }}"
     region: "{{ openshift_aws_region }}"
     image_id: "{{ l_aws_ami_map[launch_config_item.key] | default(openshift_aws_ami) }}"
     instance_type: "{{ launch_config_item.value.instance_type }}"
diff --git a/roles/openshift_aws/tasks/provision_nodes.yml b/roles/openshift_aws/tasks/provision_nodes.yml
index 1b40f24d3..3e84666a2 100644
--- a/roles/openshift_aws/tasks/provision_nodes.yml
+++ b/roles/openshift_aws/tasks/provision_nodes.yml
@@ -12,7 +12,9 @@
   register: instancesout
   retries: 20
   delay: 3
-  until: instancesout.instances|length > 0
+  until:
+  - "'instances' in instancesout"
+  - instancesout.instances|length > 0
 
 - name: slurp down the bootstrap.kubeconfig
   slurp:
@@ -42,29 +44,7 @@
     l_launch_config_security_groups: "{{ openshift_aws_launch_config_security_groups_extra }}"
     l_aws_ami_map: "{{ openshift_aws_ami_map_extra }}"
 
-
+# instances aren't scaling fast enough here, we need to wait for them
 - when: openshift_aws_wait_for_ssh | bool
-  block:
-  - name: pause and allow for instances to scale before we query them
-    pause:
-      seconds: 10
-
-  - name: fetch newly created instances
-    ec2_remote_facts:
-      region: "{{ openshift_aws_region }}"
-      filters:
-        "tag:clusterid": "{{ openshift_aws_clusterid }}"
-        "tag:host-type": node
-        instance-state-name: running
-    register: instancesout
-    retries: 20
-    delay: 3
-    until: instancesout.instances|length > 0
-
-  - name: wait for ssh to become available
-    wait_for:
-      port: 22
-      host: "{{ item.public_ip_address }}"
-      timeout: 300
-      search_regex: OpenSSH
-    with_items: "{{ instancesout.instances }}"
+  name: wait for our new nodes to come up
+  include: wait_for_groups.yml
diff --git a/roles/openshift_aws/tasks/remove_scale_group.yml b/roles/openshift_aws/tasks/remove_scale_group.yml
new file mode 100644
index 000000000..55d1af2b5
--- /dev/null
+++ b/roles/openshift_aws/tasks/remove_scale_group.yml
@@ -0,0 +1,27 @@
+---
+- name: fetch the scale groups
+  ec2_asg_facts:
+    region: "{{ openshift_aws_region }}"
+    tags:
+      "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
+           'version': openshift_aws_current_version} }}"
+  register: qasg
+
+- name: remove non-master scale groups
+  ec2_asg:
+    region: "{{ openshift_aws_region }}"
+    state: absent
+    name: "{{ item.auto_scaling_group_name }}"
+  when: "'master'  not in item.auto_scaling_group_name"
+  register: asg_results
+  with_items: "{{ qasg.results }}"
+  async: 600
+  poll: 0
+
+- name: join the asynch scale group removals
+  async_status:
+    jid: "{{ item.ansible_job_id }}"
+  register: jobs_results
+  with_items: "{{ asg_results.results }}"
+  until: jobs_results.finished
+  retries: 200
diff --git a/roles/openshift_aws/tasks/scale_group.yml b/roles/openshift_aws/tasks/scale_group.yml
index 097859af2..30df7545d 100644
--- a/roles/openshift_aws/tasks/scale_group.yml
+++ b/roles/openshift_aws/tasks/scale_group.yml
@@ -1,16 +1,8 @@
 ---
-- name: fetch the subnet to use in scale group
-  ec2_vpc_subnet_facts:
-    region: "{{ openshift_aws_region }}"
-    filters:
-      "tag:Name": "{{ openshift_aws_subnet_name }}"
-      vpc-id: "{{ vpcout.vpcs[0].id }}"
-  register: subnetout
-
 - name: Create the scale group
   ec2_asg:
     name: "{{ openshift_aws_scale_group_basename }} {{ item.key }}"
-    launch_config_name: "{{ openshift_aws_launch_config_basename }}-{{ item.key }}-{{ l_epoch_time }}"
+    launch_config_name: "{{ openshift_aws_launch_config_basename }}-{{ item.key }}{{ '-' ~ openshift_aws_new_version if openshift_aws_new_version != '' else '' }}"
     health_check_period: "{{ item.value.health_check.period }}"
     health_check_type: "{{ item.value.health_check.type }}"
     min_size: "{{ item.value.min_size }}"
diff --git a/roles/openshift_aws/tasks/setup_master_group.yml b/roles/openshift_aws/tasks/setup_master_group.yml
index 166f3b938..05b68f460 100644
--- a/roles/openshift_aws/tasks/setup_master_group.yml
+++ b/roles/openshift_aws/tasks/setup_master_group.yml
@@ -21,7 +21,7 @@
 
 - name: add new master to masters group
   add_host:
-    groups: "masters,etcd,nodes"
+    groups: "{{ openshift_aws_masters_groups }}"
     name: "{{ item.public_dns_name }}"
     hostname: "{{ openshift_aws_clusterid }}-master-{{ item.id[:-5] }}"
   with_items: "{{ instancesout.instances }}"
diff --git a/roles/openshift_aws/tasks/setup_scale_group_facts.yml b/roles/openshift_aws/tasks/setup_scale_group_facts.yml
new file mode 100644
index 000000000..d65fdc2de
--- /dev/null
+++ b/roles/openshift_aws/tasks/setup_scale_group_facts.yml
@@ -0,0 +1,37 @@
+---
+- name: group scale group nodes
+  ec2_remote_facts:
+    region: "{{ openshift_aws_region }}"
+    filters:
+      "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid }}}"
+  register: qinstances
+
+- name: Build new node group
+  add_host:
+    groups: oo_sg_new_nodes
+    ansible_ssh_host: "{{ item.public_dns_name }}"
+    name: "{{ item.public_dns_name }}"
+    hostname: "{{ item.public_dns_name }}"
+  when:
+  - (item.tags.version | default(False)) == openshift_aws_new_version
+  - "'node' in item.tags['host-type']"
+  with_items: "{{ qinstances.instances }}"
+
+- name: Build current node group
+  add_host:
+    groups: oo_sg_current_nodes
+    ansible_ssh_host: "{{ item.public_dns_name }}"
+    name: "{{ item.public_dns_name }}"
+    hostname: "{{ item.public_dns_name }}"
+  when:
+  - (item.tags.version | default('')) == openshift_aws_current_version
+  - "'node' in item.tags['host-type']"
+  with_items: "{{ qinstances.instances }}"
+
+- name: place all nodes into nodes group
+  add_host:
+    groups: nodes
+    ansible_ssh_host: "{{ item.public_dns_name }}"
+    name: "{{ item.public_dns_name }}"
+    hostname: "{{ item.public_dns_name }}"
+  with_items: "{{ qinstances.instances }}"
diff --git a/roles/openshift_aws/tasks/upgrade_node_group.yml b/roles/openshift_aws/tasks/upgrade_node_group.yml
new file mode 100644
index 000000000..d7851d887
--- /dev/null
+++ b/roles/openshift_aws/tasks/upgrade_node_group.yml
@@ -0,0 +1,16 @@
+---
+- fail:
+    msg: 'Please ensure the current_version and new_version variables are not the same.'
+  when:
+  - openshift_aws_current_version == openshift_aws_new_version
+
+- include: provision_nodes.yml
+
+- include: accept_nodes.yml
+
+- include: setup_scale_group_facts.yml
+
+- include: setup_master_group.yml
+  vars:
+    # we do not set etcd here as its limited to 1 or 3
+    openshift_aws_masters_groups: masters,nodes
diff --git a/roles/openshift_aws/tasks/vpc_and_subnet_id.yml b/roles/openshift_aws/tasks/vpc_and_subnet_id.yml
index aaf9b300f..1b754f863 100644
--- a/roles/openshift_aws/tasks/vpc_and_subnet_id.yml
+++ b/roles/openshift_aws/tasks/vpc_and_subnet_id.yml
@@ -6,13 +6,16 @@
       'tag:Name': "{{ openshift_aws_vpc_name }}"
   register: vpcout
 
-- name: debug
+- name: debug vcpout
   debug: var=vpcout
 
 - name: fetch the default subnet id
   ec2_vpc_subnet_facts:
     region: "{{ openshift_aws_region }}"
     filters:
-      "tag:Name": "{{ openshift_aws_subnet_name }}"
+      "availability_zone": "{{ openshift_aws_subnet_az }}"
       vpc-id: "{{ vpcout.vpcs[0].id }}"
   register: subnetout
+
+- name: debug subnetout
+  debug: var=subnetout
diff --git a/roles/openshift_aws/tasks/wait_for_groups.yml b/roles/openshift_aws/tasks/wait_for_groups.yml
new file mode 100644
index 000000000..9f1a68a2a
--- /dev/null
+++ b/roles/openshift_aws/tasks/wait_for_groups.yml
@@ -0,0 +1,31 @@
+---
+# The idea here is to wait until all scale groups are at
+# their desired capacity before continuing.
+- name: fetch the scale groups
+  ec2_asg_facts:
+    region: "{{ openshift_aws_region }}"
+    tags:
+      "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid} }}"
+  register: qasg
+  until: qasg.results | scale_groups_match_capacity | bool
+  delay: 10
+  retries: 60
+
+- name: fetch newly created instances
+  ec2_remote_facts:
+    region: "{{ openshift_aws_region }}"
+    filters:
+      "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
+           'tag:version': openshift_aws_new_version} }}"
+  register: instancesout
+  until: instancesout.instances|length > 0
+  delay: 5
+  retries: 60
+
+- name: wait for ssh to become available
+  wait_for:
+    port: 22
+    host: "{{ item.public_ip_address }}"
+    timeout: 300
+    search_regex: OpenSSH
+  with_items: "{{ instancesout.instances }}"
diff --git a/roles/openshift_master/tasks/bootstrap_settings.yml b/roles/openshift_master/tasks/bootstrap_settings.yml
index cbd7f587b..10e0828eb 100644
--- a/roles/openshift_master/tasks/bootstrap_settings.yml
+++ b/roles/openshift_master/tasks/bootstrap_settings.yml
@@ -11,4 +11,5 @@
       - /etc/origin/master/ca.key
   notify:
   - restart master controllers
+  - restart master api
   when: openshift_master_bootstrap_enabled | default(False)
diff --git a/roles/openshift_node_group/tasks/create_config.yml b/roles/openshift_node_group/tasks/create_config.yml
index 02ec30a62..8b3218e7b 100644
--- a/roles/openshift_node_group/tasks/create_config.yml
+++ b/roles/openshift_node_group/tasks/create_config.yml
@@ -9,6 +9,7 @@
 
 - name: debug node config
   debug: var=configout
+  run_once: true
 
 - when:
   - configout.results.results.0 == {} or (configout.results.results.0 != {} and openshift_node_group_edits|length > 0)
@@ -24,6 +25,7 @@
       dest: "{{ mktempout.stdout }}/node-config.yaml"
     when:
     - configout.results.results.0 == {}
+    run_once: true
 
   - name: lay down the config from the existing configmap
     copy:
@@ -31,6 +33,7 @@
       dest: "{{ mktempout.stdout }}/node-config.yaml"
     when:
     - configout.results.results.0 != {}
+    run_once: true
 
   - name: "specialize the generated configs for {{ openshift_node_group_name }}"
     yedit:
@@ -42,6 +45,7 @@
     run_once: true
 
   - debug: var=yeditout
+    run_once: true
 
   - name: create node-config.yaml configmap
     oc_configmap: