diff options
11 files changed, 101 insertions, 35 deletions
diff --git a/library/modify_yaml.py b/library/modify_yaml.py index 000c7d9a2..0805f01ec 100755 --- a/library/modify_yaml.py +++ b/library/modify_yaml.py @@ -25,15 +25,22 @@ EXAMPLES = ''' def set_key(yaml_data, yaml_key, yaml_value): changes = [] ptr = yaml_data + final_key = yaml_key.split('.')[-1] for key in yaml_key.split('.'): - if key not in ptr and key != yaml_key.split('.')[-1]: + # Key isn't present and we're not on the final key. Set to empty dictionary. + if key not in ptr and key != final_key: ptr[key] = {} ptr = ptr[key] - elif key == yaml_key.split('.')[-1]: + # Current key is the final key. Update value. + elif key == final_key: if (key in ptr and module.safe_eval(ptr[key]) != yaml_value) or (key not in ptr): # noqa: F405 ptr[key] = yaml_value changes.append((yaml_key, yaml_value)) else: + # Next value is None and we're not on the final key. + # Turn value into an empty dictionary. + if ptr[key] is None and key != final_key: + ptr[key] = {} ptr = ptr[key] return changes @@ -71,21 +78,18 @@ def main(): yaml.add_representer(type(None), none_representer) try: - - yaml_file = open(dest) - yaml_data = yaml.safe_load(yaml_file.read()) - yaml_file.close() + with open(dest) as yaml_file: + yaml_data = yaml.safe_load(yaml_file.read()) changes = set_key(yaml_data, yaml_key, yaml_value) if len(changes) > 0: if backup: module.backup_local(dest) - yaml_file = open(dest, 'w') - yaml_string = yaml.dump(yaml_data, default_flow_style=False) - yaml_string = yaml_string.replace('\'\'', '""') - yaml_file.write(yaml_string) - yaml_file.close() + with open(dest, 'w') as yaml_file: + yaml_string = yaml.dump(yaml_data, default_flow_style=False) + yaml_string = yaml_string.replace('\'\'', '""') + yaml_file.write(yaml_string) return module.exit_json(changed=(len(changes) > 0), changes=changes) diff --git a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml index 0d451cf77..1e0a6d4e7 100644 --- a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml +++ b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml @@ -18,20 +18,20 @@ # If a node fails, halt everything, the admin will need to clean up and we # don't want to carry on, potentially taking out every node. The playbook can safely be re-run # and will not take any action on a node already running the requested docker version. -- name: Evacuate and upgrade nodes +- name: Drain and upgrade nodes hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config serial: 1 any_errors_fatal: true tasks: - - name: Prepare for Node evacuation + - name: Prepare for Node draining command: > {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=false delegate_to: "{{ groups.oo_first_master.0 }}" when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade - - name: Evacuate Node for Kubelet upgrade + - name: Drain Node for Kubelet upgrade command: > - {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --evacuate --force + {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --drain --force delegate_to: "{{ groups.oo_first_master.0 }}" when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade.yml index 496b00697..d6115e7a5 100644 --- a/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade.yml +++ b/playbooks/byo/openshift-cluster/upgrades/v3_4/upgrade.yml @@ -89,6 +89,8 @@ - include: ../../../../common/openshift-cluster/upgrades/cleanup_unused_images.yml - include: ../../../../common/openshift-cluster/upgrades/upgrade_control_plane.yml + vars: + master_config_hook: "v3_4/master_config_upgrade.yml" - include: ../../../../common/openshift-cluster/upgrades/upgrade_nodes.yml diff --git a/playbooks/common/openshift-cluster/redeploy-certificates.yml b/playbooks/common/openshift-cluster/redeploy-certificates.yml index 5f008a045..5fc81bf3a 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates.yml @@ -204,7 +204,7 @@ cp {{ openshift.common.config_base }}/master//admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig changed_when: False -- name: Serially evacuate all nodes to trigger redeployments +- name: Serially drain all nodes to trigger redeployments hosts: oo_nodes_to_config serial: 1 any_errors_fatal: true @@ -222,7 +222,7 @@ was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}" when: openshift_certificates_redeploy_ca | default(false) | bool - - name: Prepare for node evacuation + - name: Prepare for node draining command: > {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig manage-node {{ openshift.node.nodename }} @@ -230,11 +230,11 @@ delegate_to: "{{ groups.oo_first_master.0 }}" when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool - - name: Evacuate node + - name: Drain node command: > {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig manage-node {{ openshift.node.nodename }} - --evacuate --force + --drain --force delegate_to: "{{ groups.oo_first_master.0 }}" when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml index 5ff9521ec..0f8d94737 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml @@ -75,7 +75,7 @@ hosts: etcd_hosts_to_upgrade serial: 1 vars: - upgrade_version: 3.0.14 + upgrade_version: 3.0.15 tasks: - include: containerized_tasks.yml when: etcd_container_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 474e6311e..6950b6166 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -51,6 +51,14 @@ - include: create_service_signer_cert.yml +# Set openshift_master_facts separately. In order to reconcile +# admission_config's, we currently must run openshift_master_facts and +# then run openshift_facts. +- name: Set OpenShift master facts + hosts: oo_masters_to_config + roles: + - openshift_master_facts + - name: Upgrade master config and systemd units hosts: oo_masters_to_config handlers: @@ -58,8 +66,9 @@ static: yes roles: - openshift_facts - - openshift_master_facts - tasks: + post_tasks: + - include_vars: ../../../../roles/openshift_master_facts/vars/main.yml + - include: upgrade_scheduler.yml - include: "{{ master_config_hook }}" diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index cefc7d12b..68b111df4 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -1,5 +1,5 @@ --- -- name: Evacuate and upgrade nodes +- name: Drain and upgrade nodes hosts: oo_nodes_to_upgrade # This var must be set with -e on invocation, as it is not a per-host inventory var # and is evaluated early. Values such as "20%" can also be used. @@ -39,9 +39,9 @@ retries: 3 delay: 1 - - name: Evacuate Node for Kubelet upgrade + - name: Drain Node for Kubelet upgrade command: > - {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --evacuate --force + {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --drain --force delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md index d44438332..a88470bdd 100644 --- a/roles/openshift_certificate_expiry/README.md +++ b/roles/openshift_certificate_expiry/README.md @@ -9,7 +9,7 @@ include: * Master/Node Service Certificates * Router/Registry Service Certificates from etcd secrets * Master/Node/Router/Registry/Admin `kubeconfig`s -* Etcd certificates +* Etcd certificates (including embedded) This role pairs well with the redeploy certificates playbook: @@ -111,12 +111,16 @@ There are two top-level keys in the saved JSON results, `data` and `summary`. The `data` key is a hash where the keys are the names of each host -examined and the values are the check results for each respective -host. +examined and the values are the check results for the certificates +identified on each respective host. -The `summary` key is a hash that summarizes the number of certificates -expiring within the configured warning window and the number of -already expired certificates. +The `summary` key is a hash that summarizes the total number of +certificates: + +* examined on the entire cluster +* OK +* expiring within the configured warning window +* already expired The example below is abbreviated to save space: @@ -193,7 +197,9 @@ The example below is abbreviated to save space: }, "summary": { "warning": 6, - "expired": 0 + "expired": 0, + "total": 7, + "ok": 1 } } ``` diff --git a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py index bedd23fe8..5f102e960 100644 --- a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py +++ b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py @@ -51,9 +51,13 @@ Example playbook usage: total_warnings = sum([hostvars[h]['check_results']['summary']['warning'] for h in play_hosts]) total_expired = sum([hostvars[h]['check_results']['summary']['expired'] for h in play_hosts]) + total_ok = sum([hostvars[h]['check_results']['summary']['ok'] for h in play_hosts]) + total_total = sum([hostvars[h]['check_results']['summary']['total'] for h in play_hosts]) json_result['summary']['warning'] = total_warnings json_result['summary']['expired'] = total_expired + json_result['summary']['ok'] = total_ok + json_result['summary']['total'] = total_total return json_result diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py index e838eb2d4..1fac284f2 100644 --- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -467,7 +467,11 @@ an OpenShift Container Platform cluster ###################################################################### # Check etcd certs + # + # Two things to check: 'external' etcd, and embedded etcd. ###################################################################### + # FIRST: The 'external' etcd + # # Some values may be duplicated, make this a set for now so we # unique them all etcd_certs_to_check = set([]) @@ -506,6 +510,43 @@ an OpenShift Container Platform cluster classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs) ###################################################################### + # Now the embedded etcd + ###################################################################### + try: + with open('/etc/origin/master/master-config.yaml', 'r') as fp: + cfg = yaml.load(fp) + except IOError: + # Not present + pass + else: + if cfg.get('etcdConfig', {}).get('servingInfo', {}).get('certFile', None) is not None: + # This is embedded + etcd_crt_name = cfg['etcdConfig']['servingInfo']['certFile'] + else: + # Not embedded + etcd_crt_name = None + + if etcd_crt_name is not None: + # etcd_crt_name is relative to the location of the + # master-config.yaml file + cfg_path = os.path.dirname(fp.name) + etcd_cert = os.path.join(cfg_path, etcd_crt_name) + with open(etcd_cert, 'r') as etcd_fp: + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(etcd_fp.read(), now) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': etcd_fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs) + + ###################################################################### # /Check etcd certs ###################################################################### @@ -523,7 +564,7 @@ an OpenShift Container Platform cluster ###################################################################### # First the router certs try: - router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(), + router_secrets_raw = subprocess.Popen('oc get -n default secret router-certs -o yaml'.split(), stdout=subprocess.PIPE) router_ds = yaml.load(router_secrets_raw.communicate()[0]) router_c = router_ds['data']['tls.crt'] @@ -552,7 +593,7 @@ an OpenShift Container Platform cluster ###################################################################### # Now for registry try: - registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(), + registry_secrets_raw = subprocess.Popen('oc get -n default secret registry-certificates -o yaml'.split(), stdout=subprocess.PIPE) registry_ds = yaml.load(registry_secrets_raw.communicate()[0]) registry_c = registry_ds['data']['registry.crt'] diff --git a/roles/openshift_node/README.md b/roles/openshift_node/README.md index d1920c485..616f44c1d 100644 --- a/roles/openshift_node/README.md +++ b/roles/openshift_node/README.md @@ -43,7 +43,7 @@ Currently we support re-labeling nodes but we don't re-schedule running pods nor ``` oadm manage-node --schedulable=false ${NODE} -oadm manage-node --evacuate ${NODE} +oadm manage-node --drain ${NODE} oadm manage-node --schedulable=true ${NODE} ```` |