7 files changed, 72 insertions, 21 deletions
diff --git a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
index 0d451cf77..1e0a6d4e7 100644
--- a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
+++ b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
@@ -18,20 +18,20 @@
 # If a node fails, halt everything, the admin will need to clean up and we
 # don't want to carry on, potentially taking out every node. The playbook can safely be re-run
 # and will not take any action on a node already running the requested docker version.
-- name: Evacuate and upgrade nodes
+- name: Drain and upgrade nodes
   hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
   serial: 1
   any_errors_fatal: true
   tasks:
-  - name: Prepare for Node evacuation
+  - name: Prepare for Node draining
     command: >
       {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=false
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
 
-  - name: Evacuate Node for Kubelet upgrade
+  - name: Drain Node for Kubelet upgrade
     command: >
-      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --evacuate --force
+      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --drain --force
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
 
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates.yml b/playbooks/common/openshift-cluster/redeploy-certificates.yml
index 5f008a045..5fc81bf3a 100644
--- a/playbooks/common/openshift-cluster/redeploy-certificates.yml
+++ b/playbooks/common/openshift-cluster/redeploy-certificates.yml
@@ -204,7 +204,7 @@
       cp {{ openshift.common.config_base }}/master//admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig
     changed_when: False
 
-- name: Serially evacuate all nodes to trigger redeployments
+- name: Serially drain all nodes to trigger redeployments
   hosts: oo_nodes_to_config
   serial: 1
   any_errors_fatal: true
@@ -222,7 +222,7 @@
       was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
     when: openshift_certificates_redeploy_ca | default(false) | bool
 
-  - name: Prepare for node evacuation
+  - name: Prepare for node draining
     command: >
       {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig
       manage-node {{ openshift.node.nodename }}
@@ -230,11 +230,11 @@
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool
 
-  - name: Evacuate node
+  - name: Drain node
     command: >
       {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig
       manage-node {{ openshift.node.nodename }}
-      --evacuate --force
+      --drain --force
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool
 
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index cefc7d12b..68b111df4 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -1,5 +1,5 @@
 ---
-- name: Evacuate and upgrade nodes
+- name: Drain and upgrade nodes
   hosts: oo_nodes_to_upgrade
   # This var must be set with -e on invocation, as it is not a per-host inventory var
   # and is evaluated early. Values such as "20%" can also be used.
@@ -39,9 +39,9 @@
     retries: 3
     delay: 1
 
-  - name: Evacuate Node for Kubelet upgrade
+  - name: Drain Node for Kubelet upgrade
     command: >
-      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --evacuate --force
+      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --drain --force
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_upgrade
 
diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md
index d44438332..a88470bdd 100644
--- a/roles/openshift_certificate_expiry/README.md
+++ b/roles/openshift_certificate_expiry/README.md
@@ -9,7 +9,7 @@ include:
 * Master/Node Service Certificates
 * Router/Registry Service Certificates from etcd secrets
 * Master/Node/Router/Registry/Admin `kubeconfig`s
-* Etcd certificates
+* Etcd certificates (including embedded)
 
 This role pairs well with the redeploy certificates playbook:
 
@@ -111,12 +111,16 @@ There are two top-level keys in the saved JSON results, `data` and
 `summary`.
 
 The `data` key is a hash where the keys are the names of each host
-examined and the values are the check results for each respective
-host.
+examined and the values are the check results for the certificates
+identified on each respective host.
 
-The `summary` key is a hash that summarizes the number of certificates
-expiring within the configured warning window and the number of
-already expired certificates.
+The `summary` key is a hash that summarizes the total number of
+certificates:
+
+* examined on the entire cluster
+* OK
+* expiring within the configured warning window
+* already expired
 
 The example below is abbreviated to save space:
 
@@ -193,7 +197,9 @@ The example below is abbreviated to save space:
     },
     "summary": {
         "warning": 6,
-        "expired": 0
+        "expired": 0,
+        "total": 7,
+        "ok": 1
     }
 }
 ```
diff --git a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py
index bedd23fe8..5f102e960 100644
--- a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py
+++ b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py
@@ -51,9 +51,13 @@ Example playbook usage:
 
         total_warnings = sum([hostvars[h]['check_results']['summary']['warning'] for h in play_hosts])
         total_expired = sum([hostvars[h]['check_results']['summary']['expired'] for h in play_hosts])
+        total_ok = sum([hostvars[h]['check_results']['summary']['ok'] for h in play_hosts])
+        total_total = sum([hostvars[h]['check_results']['summary']['total'] for h in play_hosts])
 
         json_result['summary']['warning'] = total_warnings
         json_result['summary']['expired'] = total_expired
+        json_result['summary']['ok'] = total_ok
+        json_result['summary']['total'] = total_total
 
         return json_result
 
diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
index e838eb2d4..1fac284f2 100644
--- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
+++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
@@ -467,7 +467,11 @@ an OpenShift Container Platform cluster
 
     ######################################################################
     # Check etcd certs
+    #
+    # Two things to check: 'external' etcd, and embedded etcd.
     ######################################################################
+    # FIRST: The 'external' etcd
+    #
     # Some values may be duplicated, make this a set for now so we
     # unique them all
     etcd_certs_to_check = set([])
@@ -506,6 +510,43 @@ an OpenShift Container Platform cluster
             classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
 
     ######################################################################
+    # Now the embedded etcd
+    ######################################################################
+    try:
+        with open('/etc/origin/master/master-config.yaml', 'r') as fp:
+            cfg = yaml.load(fp)
+    except IOError:
+        # Not present
+        pass
+    else:
+        if cfg.get('etcdConfig', {}).get('servingInfo', {}).get('certFile', None) is not None:
+            # This is embedded
+            etcd_crt_name = cfg['etcdConfig']['servingInfo']['certFile']
+        else:
+            # Not embedded
+            etcd_crt_name = None
+
+        if etcd_crt_name is not None:
+            # etcd_crt_name is relative to the location of the
+            # master-config.yaml file
+            cfg_path = os.path.dirname(fp.name)
+            etcd_cert = os.path.join(cfg_path, etcd_crt_name)
+            with open(etcd_cert, 'r') as etcd_fp:
+                (cert_subject,
+                 cert_expiry_date,
+                 time_remaining) = load_and_handle_cert(etcd_fp.read(), now)
+
+                expire_check_result = {
+                    'cert_cn': cert_subject,
+                    'path': etcd_fp.name,
+                    'expiry': cert_expiry_date,
+                    'days_remaining': time_remaining.days,
+                    'health': None,
+                }
+
+                classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
+
+    ######################################################################
     # /Check etcd certs
     ######################################################################
 
@@ -523,7 +564,7 @@ an OpenShift Container Platform cluster
     ######################################################################
     # First the router certs
     try:
-        router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(),
+        router_secrets_raw = subprocess.Popen('oc get -n default secret router-certs -o yaml'.split(),
                                               stdout=subprocess.PIPE)
         router_ds = yaml.load(router_secrets_raw.communicate()[0])
         router_c = router_ds['data']['tls.crt']
@@ -552,7 +593,7 @@ an OpenShift Container Platform cluster
     ######################################################################
     # Now for registry
     try:
-        registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(),
+        registry_secrets_raw = subprocess.Popen('oc get -n default secret registry-certificates -o yaml'.split(),
                                                 stdout=subprocess.PIPE)
         registry_ds = yaml.load(registry_secrets_raw.communicate()[0])
         registry_c = registry_ds['data']['registry.crt']
diff --git a/roles/openshift_node/README.md b/roles/openshift_node/README.md
index d1920c485..616f44c1d 100644
--- a/roles/openshift_node/README.md
+++ b/roles/openshift_node/README.md
@@ -43,7 +43,7 @@ Currently we support re-labeling nodes but we don't re-schedule running pods nor
 
 ```
 oadm manage-node --schedulable=false ${NODE}
-oadm manage-node --evacuate ${NODE}
+oadm manage-node --drain ${NODE}
 oadm manage-node --schedulable=true ${NODE}
 ````