diff options
36 files changed, 1213 insertions, 178 deletions
diff --git a/callback_plugins/default.py b/callback_plugins/default.py index bc0b207bb..c64145b5c 100644 --- a/callback_plugins/default.py +++ b/callback_plugins/default.py @@ -45,6 +45,9 @@ class CallbackModule(DEFAULT_MODULE.CallbackModule):  # pylint: disable=too-few-      CALLBACK_TYPE = 'stdout'      CALLBACK_NAME = 'default' +    def __init__(self, *args, **kwargs): +        BASECLASS.__init__(self, *args, **kwargs) +      def _dump_results(self, result):          '''Return the text to output for a result.'''          result['_ansible_verbose_always'] = True diff --git a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml index 9be6becc1..834461e14 100644 --- a/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml +++ b/playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml @@ -25,13 +25,13 @@    tasks:    - name: Prepare for Node evacuation      command: > -      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=false +      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=false      delegate_to: "{{ groups.oo_first_master.0 }}"      when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade    - name: Evacuate Node for Kubelet upgrade      command: > -      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --evacuate --force +      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --evacuate --force      delegate_to: "{{ groups.oo_first_master.0 }}"      when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade @@ -40,7 +40,7 @@    - name: Set node schedulability      command: > -      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=true +      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename }} --schedulable=true      delegate_to: "{{ groups.oo_first_master.0 }}"      when: openshift.node.schedulable | bool      when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade and openshift.node.schedulable | bool diff --git a/playbooks/common/openshift-cluster/redeploy-certificates.yml b/playbooks/common/openshift-cluster/redeploy-certificates.yml index 4996c56a7..5f008a045 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates.yml @@ -224,7 +224,7 @@    - name: Prepare for node evacuation      command: > -      {{ openshift.common.admin_binary }} --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig +      {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig        manage-node {{ openshift.node.nodename }}        --schedulable=false      delegate_to: "{{ groups.oo_first_master.0 }}" @@ -232,7 +232,7 @@    - name: Evacuate node      command: > -      {{ openshift.common.admin_binary }} --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig +      {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig        manage-node {{ openshift.node.nodename }}        --evacuate --force      delegate_to: "{{ groups.oo_first_master.0 }}" @@ -240,7 +240,7 @@    - name: Set node schedulability      command: > -      {{ openshift.common.admin_binary }} --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig +      {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig        manage-node {{ openshift.node.nodename }} --schedulable=true      delegate_to: "{{ groups.oo_first_master.0 }}"      when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool diff --git a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml index 78f6c46f3..460d572da 100644 --- a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml +++ b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml @@ -22,7 +22,7 @@    - name: Create service signer certificate      command: > -      {{ openshift.common.admin_binary }} ca create-signer-cert +      {{ openshift.common.client_binary }} adm ca create-signer-cert        --cert=service-signer.crt        --key=service-signer.key        --name=openshift-service-serving-signer diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 947f865d3..b3f4d7d1a 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -200,19 +200,15 @@      # restart.      skip_docker_role: True    tasks: -  - name: Verifying the correct commandline tools are available -    shell: grep {{ verify_upgrade_version }} {{ openshift.common.admin_binary}} -    when: openshift.common.is_containerized | bool and verify_upgrade_version is defined -    - name: Reconcile Cluster Roles      command: > -      {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig        policy reconcile-cluster-roles --additive-only=true --confirm      run_once: true    - name: Reconcile Cluster Role Bindings      command: > -      {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig +      {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig        policy reconcile-cluster-role-bindings        --exclude-groups=system:authenticated        --exclude-groups=system:authenticated:oauth @@ -224,7 +220,7 @@    - name: Reconcile Security Context Constraints      command: > -      {{ openshift.common.admin_binary}} policy reconcile-sccs --confirm --additive-only=true +      {{ openshift.common.client_binary }} adm policy reconcile-sccs --confirm --additive-only=true      run_once: true    - set_fact: diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 9b572dcdf..1f314c854 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -29,7 +29,7 @@    - name: Mark unschedulable if host is a node      command: > -      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false +      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false      delegate_to: "{{ groups.oo_first_master.0 }}"      when: inventory_hostname in groups.oo_nodes_to_upgrade      # NOTE: There is a transient "object has been modified" error here, allow a couple @@ -41,7 +41,7 @@    - name: Evacuate Node for Kubelet upgrade      command: > -      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --evacuate --force +      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --evacuate --force      delegate_to: "{{ groups.oo_first_master.0 }}"      when: inventory_hostname in groups.oo_nodes_to_upgrade    tasks: @@ -64,7 +64,7 @@    - name: Set node schedulability      command: > -      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true +      {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true      delegate_to: "{{ groups.oo_first_master.0 }}"      when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool      register: node_sched diff --git a/playbooks/common/openshift-master/restart.yml b/playbooks/common/openshift-master/restart.yml index 57a63cfee..5769ef5cd 100644 --- a/playbooks/common/openshift-master/restart.yml +++ b/playbooks/common/openshift-master/restart.yml @@ -66,63 +66,8 @@        current_host: "{{ exists.stat.exists }}"      when: openshift.common.rolling_restart_mode == 'system' -- name: Determine which masters are currently active -  hosts: oo_masters_to_config -  any_errors_fatal: true -  tasks: -  - name: Check master service status -    command: > -      systemctl is-active {{ openshift.common.service_type }}-master -    register: active_check_output -    when: openshift.master.cluster_method | default(None) == 'pacemaker' -    failed_when: false -    changed_when: false -  - set_fact: -      is_active: "{{ active_check_output.stdout == 'active' }}" -    when: openshift.master.cluster_method | default(None) == 'pacemaker' - -- name: Evaluate master groups -  hosts: localhost -  become: no -  tasks: -  - fail: -      msg: > -        Did not receive active status from any masters. Please verify pacemaker cluster. -    when: "{{ hostvars[groups.oo_first_master.0].openshift.master.cluster_method | default(None) == 'pacemaker' and 'True' not in (hostvars -              | oo_select_keys(groups['oo_masters_to_config']) -              | oo_collect('is_active') -              | list) }}" -  - name: Evaluate oo_active_masters -    add_host: -      name: "{{ item }}" -      groups: oo_active_masters -      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" -      ansible_become: "{{ g_sudo | default(omit) }}" -    with_items: "{{ groups.oo_masters_to_config | default([]) }}" -    when: (hostvars[item]['is_active'] | default(false)) | bool -  - name: Evaluate oo_current_masters -    add_host: -      name: "{{ item }}" -      groups: oo_current_masters -      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" -      ansible_become: "{{ g_sudo | default(omit) }}" -    with_items: "{{ groups.oo_masters_to_config | default([]) }}" -    when: (hostvars[item]['current_host'] | default(false)) | bool - -- name: Validate pacemaker cluster -  hosts: oo_active_masters -  tasks: -  - name: Retrieve pcs status -    command: pcs status -    register: pcs_status_output -    changed_when: false -  - fail: -      msg: > -        Pacemaker cluster validation failed. One or more nodes are not online. -    when: not (pcs_status_output.stdout | validate_pcs_cluster(groups.oo_masters_to_config)) | bool -  - name: Restart masters -  hosts: oo_masters_to_config:!oo_active_masters:!oo_current_masters +  hosts: oo_masters_to_config    vars:      openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"    serial: 1 @@ -132,20 +77,3 @@    - include: restart_services.yml      when: openshift.common.rolling_restart_mode == 'services' -- name: Restart active masters -  hosts: oo_active_masters -  serial: 1 -  tasks: -  - include: restart_hosts_pacemaker.yml -    when: openshift.common.rolling_restart_mode == 'system' -  - include: restart_services_pacemaker.yml -    when: openshift.common.rolling_restart_mode == 'services' - -- name: Restart current masters -  hosts: oo_current_masters -  serial: 1 -  tasks: -  - include: restart_hosts.yml -    when: openshift.common.rolling_restart_mode == 'system' -  - include: restart_services.yml -    when: openshift.common.rolling_restart_mode == 'services' diff --git a/playbooks/common/openshift-master/restart_hosts.yml b/playbooks/common/openshift-master/restart_hosts.yml index ff206f5a2..b1c36718c 100644 --- a/playbooks/common/openshift-master/restart_hosts.yml +++ b/playbooks/common/openshift-master/restart_hosts.yml @@ -5,8 +5,8 @@    poll: 0    ignore_errors: true    become: yes -# When cluster_method != pacemaker we can ensure the api_port is -# available. + +# Ensure the api_port is available.  - name: Wait for master API to come back online    become: no    local_action: @@ -15,25 +15,3 @@        state=started        delay=10        port="{{ openshift.master.api_port }}" -  when: openshift.master.cluster_method != 'pacemaker' -- name: Wait for master to start -  become: no -  local_action: -    module: wait_for -      host="{{ inventory_hostname }}" -      state=started -      delay=10 -      port=22 -  when: openshift.master.cluster_method == 'pacemaker' -- name: Wait for master to become available -  command: pcs status -  register: pcs_status_output -  until: pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname]) | bool -  retries: 15 -  delay: 2 -  changed_when: false -  when: openshift.master.cluster_method == 'pacemaker' -- fail: -    msg: > -      Pacemaker cluster validation failed {{ inventory hostname }} is not online. -  when: openshift.master.cluster_method == 'pacemaker' and not (pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname])) | bool diff --git a/playbooks/common/openshift-master/restart_hosts_pacemaker.yml b/playbooks/common/openshift-master/restart_hosts_pacemaker.yml deleted file mode 100644 index c9219e8de..000000000 --- a/playbooks/common/openshift-master/restart_hosts_pacemaker.yml +++ /dev/null @@ -1,25 +0,0 @@ -- name: Fail over master resource -  command: > -    pcs resource move master {{ hostvars | oo_select_keys(groups['oo_masters_to_config']) | oo_collect('openshift.common.hostname', {'is_active': 'False'}) | list | first }} -- name: Wait for master API to come back online -  become: no -  local_action: -    module: wait_for -      host="{{ openshift.master.cluster_hostname }}" -      state=started -      delay=10 -      port="{{ openshift.master.api_port }}" -- name: Restart master system -  # https://github.com/ansible/ansible/issues/10616 -  shell: sleep 2 && shutdown -r now "OpenShift Ansible master rolling restart" -  async: 1 -  poll: 0 -  ignore_errors: true -  become: yes -- name: Wait for master to start -  become: no -  local_action: -   module: wait_for -      host="{{ inventory_hostname }}" -      state=started -      delay=10 diff --git a/playbooks/common/openshift-master/restart_services_pacemaker.yml b/playbooks/common/openshift-master/restart_services_pacemaker.yml deleted file mode 100644 index e738f3fb6..000000000 --- a/playbooks/common/openshift-master/restart_services_pacemaker.yml +++ /dev/null @@ -1,10 +0,0 @@ -- name: Restart master services -  command: pcs resource restart master -- name: Wait for master API to come back online -  become: no -  local_action: -    module: wait_for -      host="{{ openshift.master.cluster_hostname }}" -      state=started -      delay=10 -      port="{{ openshift.master.api_port }}" diff --git a/roles/etcd_common/library/delegated_serial_command.py b/roles/etcd_common/library/delegated_serial_command.py index 3969edfdd..84d4f97c2 100755 --- a/roles/etcd_common/library/delegated_serial_command.py +++ b/roles/etcd_common/library/delegated_serial_command.py @@ -270,6 +270,5 @@ def main():  # import module snippets  from ansible.module_utils.basic import * -from ansible.module_utils.splitter import *  main() diff --git a/roles/nuage_master/tasks/serviceaccount.yml b/roles/nuage_master/tasks/serviceaccount.yml index 5b4af5824..2b3ae0454 100644 --- a/roles/nuage_master/tasks/serviceaccount.yml +++ b/roles/nuage_master/tasks/serviceaccount.yml @@ -16,7 +16,7 @@    shell: >      echo {{ nuage_service_account_config | to_json | quote }} |      {{ openshift.common.client_binary }} create -    -n default  +    -n default      --config={{nuage_tmp_conf}}      -f -    register: osnuage_create_service_account @@ -25,7 +25,7 @@  - name: Configure role/user permissions    command: > -    {{ openshift.common.admin_binary }} {{item}} +    {{ openshift.common.client_binary }} adm {{item}}      --config={{nuage_tmp_conf}}    with_items: "{{nuage_tasks}}"    register: osnuage_perm_task @@ -34,7 +34,7 @@  - name: Generate the node client config    command: > -    {{ openshift.common.admin_binary }} create-api-client-config +    {{ openshift.common.client_binary }} adm create-api-client-config        --certificate-authority={{ openshift_master_ca_cert }}        --client-dir={{ cert_output_dir }}        --master={{ openshift.master.api_url }} diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml index bb89b65a6..b6d403067 100644 --- a/roles/openshift_ca/tasks/main.yml +++ b/roles/openshift_ca/tasks/main.yml @@ -80,7 +80,7 @@  - name: Create the master certificates if they do not already exist    command: > -    {{ openshift.common.admin_binary }} create-master-certs +    {{ openshift.common.client_binary }} adm create-master-certs      {% for named_ca_certificate in openshift.master.named_certificates | default([]) | oo_collect('cafile') %}      --certificate-authority {{ named_ca_certificate }}      {% endfor %} diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md new file mode 100644 index 000000000..d44438332 --- /dev/null +++ b/roles/openshift_certificate_expiry/README.md @@ -0,0 +1,250 @@ +OpenShift Certificate Expiration Checker +======================================== + +OpenShift certificate expiration checking. Be warned of certificates +expiring within a configurable window of days, and notified of +certificates which have already expired. Certificates examined +include: + +* Master/Node Service Certificates +* Router/Registry Service Certificates from etcd secrets +* Master/Node/Router/Registry/Admin `kubeconfig`s +* Etcd certificates + +This role pairs well with the redeploy certificates playbook: + +* [Redeploying Certificates Documentation](https://docs.openshift.com/container-platform/latest/install_config/redeploying_certificates.html) + +Just like the redeploying certificates playbook, this role is intended +to be used with an inventory that is representative of the +cluster. For best results run `ansible-playbook` with the `-v` option. + + + +Role Variables +-------------- + +Core variables in this role: + +| Name                                                  | Default value                  | Description                                                           | +|-------------------------------------------------------|--------------------------------|-----------------------------------------------------------------------| +| `openshift_certificate_expiry_config_base`            | `/etc/origin`                  | Base openshift config directory                                       | +| `openshift_certificate_expiry_warning_days`           | `30`                           | Flag certificates which will expire in this many days from now        | +| `openshift_certificate_expiry_show_all`               | `no`                           | Include healthy (non-expired and non-warning) certificates in results | + +Optional report/result saving variables in this role: + +| Name                                                  | Default value                  | Description                                                           | +|-------------------------------------------------------|--------------------------------|-----------------------------------------------------------------------| +| `openshift_certificate_expiry_generate_html_report`   | `no`                           | Generate an HTML report of the expiry check results                   | +| `openshift_certificate_expiry_html_report_path`       | `/tmp/cert-expiry-report.html` | The full path to save the HTML report as                              | +| `openshift_certificate_expiry_save_json_results`      | `no`                           | Save expiry check results as a json file                              | +| `openshift_certificate_expiry_json_results_path`      | `/tmp/cert-expiry-report.json` | The full path to save the json report as                              | + + +Example Playbook +---------------- + +Default behavior: + +```yaml +--- +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  roles: +    - role: openshift_certificate_expiry +``` + +Generate HTML and JSON artifacts in their default paths: + +```yaml +--- +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_generate_html_report: yes +    openshift_certificate_expiry_save_json_results: yes +  roles: +    - role: openshift_certificate_expiry +``` + +Change the expiration warning window to 1500 days (good for testing +the module out): + +```yaml +--- +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_warning_days: 1500 +  roles: +    - role: openshift_certificate_expiry +``` + +Change the expiration warning window to 1500 days (good for testing +the module out) and save the results as a JSON file: + +```yaml +--- +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_warning_days: 1500 +    openshift_certificate_expiry_save_json_results: yes +  roles: +    - role: openshift_certificate_expiry +``` + + +JSON Output +----------- + +There are two top-level keys in the saved JSON results, `data` and +`summary`. + +The `data` key is a hash where the keys are the names of each host +examined and the values are the check results for each respective +host. + +The `summary` key is a hash that summarizes the number of certificates +expiring within the configured warning window and the number of +already expired certificates. + +The example below is abbreviated to save space: + +```json +{ +    "data": { +        "192.168.124.148": { +            "etcd": [ +                { +                    "cert_cn": "CN:etcd-signer@1474563722", +                    "days_remaining": 350, +                    "expiry": "2017-09-22 17:02:25", +                    "health": "warning", +                    "path": "/etc/etcd/ca.crt" +                }, +            ], +            "kubeconfigs": [ +                { +                    "cert_cn": "O:system:nodes, CN:system:node:m01.example.com", +                    "days_remaining": 715, +                    "expiry": "2018-09-22 17:08:57", +                    "health": "warning", +                    "path": "/etc/origin/node/system:node:m01.example.com.kubeconfig" +                }, +                { +                    "cert_cn": "O:system:cluster-admins, CN:system:admin", +                    "days_remaining": 715, +                    "expiry": "2018-09-22 17:04:40", +                    "health": "warning", +                    "path": "/etc/origin/master/admin.kubeconfig" +                } +            ], +            "meta": { +                "checked_at_time": "2016-10-07 15:26:47.608192", +                "show_all": "True", +                "warn_before_date": "2020-11-15 15:26:47.608192", +                "warning_days": 1500 +            }, +            "ocp_certs": [ +                { +                    "cert_cn": "CN:172.30.0.1, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:m01.example.com, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:172.30.0.1, DNS:192.168.124.148, IP Address:172.30.0.1, IP Address:192.168.124.148", +                    "days_remaining": 715, +                    "expiry": "2018-09-22 17:04:39", +                    "health": "warning", +                    "path": "/etc/origin/master/master.server.crt" +                }, +                { +                    "cert_cn": "CN:openshift-signer@1474563878", +                    "days_remaining": 1810, +                    "expiry": "2021-09-21 17:04:38", +                    "health": "ok", +                    "path": "/etc/origin/node/ca.crt" +                } +            ], +            "registry": [ +                { +                    "cert_cn": "CN:172.30.101.81, DNS:docker-registry-default.router.default.svc.cluster.local, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.101.81, IP Address:172.30.101.81", +                    "days_remaining": 728, +                    "expiry": "2018-10-05 18:54:29", +                    "health": "warning", +                    "path": "/api/v1/namespaces/default/secrets/registry-certificates" +                } +            ], +            "router": [ +                { +                    "cert_cn": "CN:router.default.svc, DNS:router.default.svc, DNS:router.default.svc.cluster.local", +                    "days_remaining": 715, +                    "expiry": "2018-09-22 17:48:23", +                    "health": "warning", +                    "path": "/api/v1/namespaces/default/secrets/router-certs" +                } +            ] +        } +    }, +    "summary": { +        "warning": 6, +        "expired": 0 +    } +} +``` + +The `summary` from the json data can be easily checked for +warnings/expirations using a variety of command-line tools. + +For exampe, using `grep` we can look for the word `summary` and print +out the 2 lines **after** the match (`-A2`): + +``` +$ grep -A2 summary /tmp/cert-expiry-report.json +    "summary": { +        "warning": 16, +        "expired": 0 +``` + +If available, the [jq](https://stedolan.github.io/jq/) tool can also +be used to pick out specific values. Example 1 and 2 below show how to +select just one value, either `warning` or `expired`. Example 3 shows +how to select both values at once: + +``` +$ jq '.summary.warning' /tmp/cert-expiry-report.json +16 +$ jq '.summary.expired' /tmp/cert-expiry-report.json +0 +$ jq '.summary.warning,.summary.expired' /tmp/cert-expiry-report.json +16 +0 +``` + + +Requirements +------------ + +* None + + +Dependencies +------------ + +* None + + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +Tim Bielawa (tbielawa@redhat.com) diff --git a/roles/openshift_certificate_expiry/defaults/main.yml b/roles/openshift_certificate_expiry/defaults/main.yml new file mode 100644 index 000000000..6d7b19298 --- /dev/null +++ b/roles/openshift_certificate_expiry/defaults/main.yml @@ -0,0 +1,8 @@ +--- +openshift_certificate_expiry_config_base: "/etc/origin" +openshift_certificate_expiry_warning_days: 30 +openshift_certificate_expiry_show_all: no +openshift_certificate_expiry_generate_html_report: no +openshift_certificate_expiry_html_report_path: "/tmp/cert-expiry-report.html" +openshift_certificate_expiry_save_json_results: no +openshift_certificate_expiry_json_results_path: "/tmp/cert-expiry-report.json" diff --git a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py new file mode 100644 index 000000000..2e2430ee6 --- /dev/null +++ b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py @@ -0,0 +1,88 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# vim: expandtab:tabstop=4:shiftwidth=4 +""" +Custom filters for use in openshift-ansible +""" + +from ansible import errors +from collections import Mapping +from distutils.util import strtobool +from distutils.version import LooseVersion +from operator import itemgetter +import OpenSSL.crypto +import os +import pdb +import pkg_resources +import re +import json +import yaml +from ansible.parsing.yaml.dumper import AnsibleDumper +from urlparse import urlparse + +try: +    # ansible-2.2 +    # ansible.utils.unicode.to_unicode is deprecated in ansible-2.2, +    # ansible.module_utils._text.to_text should be used instead. +    from ansible.module_utils._text import to_text +except ImportError: +    # ansible-2.1 +    from ansible.utils.unicode import to_unicode as to_text + +# Disabling too-many-public-methods, since filter methods are necessarily +# public +# pylint: disable=too-many-public-methods +class FilterModule(object): +    """ Custom ansible filters """ + +    @staticmethod +    def oo_cert_expiry_results_to_json(hostvars, play_hosts): +        """Takes results (`hostvars`) from the openshift_cert_expiry role +check and serializes them into proper machine-readable JSON +output. This filter parameter **MUST** be the playbook `hostvars` +variable. The `play_hosts` parameter is so we know what to loop over +when we're extrating the values. + +Returns: + +Results are collected into two top-level keys under the `json_results` +dict: + +* `json_results.data` [dict] - Each individual host check result, keys are hostnames +* `json_results.summary` [dict] - Summary of number of `warning` and `expired` +certificates + +Example playbook usage: + +  - name: Generate expiration results JSON +    become: no +    run_once: yes +    delegate_to: localhost +    when: "{{ openshift_certificate_expiry_save_json_results|bool }}" +    copy: +      content: "{{ hostvars|oo_cert_expiry_results_to_json() }}" +      dest: "{{ openshift_certificate_expiry_json_results_path }}" + +        """ +        json_result = { +            'data': {}, +            'summary': {}, +        } + +        for host in play_hosts: +            json_result['data'][host] = hostvars[host]['check_results']['check_results'] + +        total_warnings = sum([hostvars[h]['check_results']['summary']['warning'] for h in play_hosts]) +        total_expired = sum([hostvars[h]['check_results']['summary']['expired'] for h in play_hosts]) + +        json_result['summary']['warning'] = total_warnings +        json_result['summary']['expired'] = total_expired + +        return json_result + + +    def filters(self): +        """ returns a mapping of filters to methods """ +        return { +            "oo_cert_expiry_results_to_json": self.oo_cert_expiry_results_to_json, +        } diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py new file mode 100644 index 000000000..2cdb87dc1 --- /dev/null +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -0,0 +1,637 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# pylint: disable=line-too-long,invalid-name + +"""For details on this module see DOCUMENTATION (below)""" + +# router/registry cert grabbing +import subprocess +# etcd config file +import ConfigParser +# Expiration parsing +import datetime +# File path stuff +import os +# Config file parsing +import yaml +# Certificate loading +import OpenSSL.crypto + +DOCUMENTATION = ''' +--- +module: openshift_cert_expiry +short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster +description: +  - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired. +  - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following: +  - C(ok) - not expired, and outside of the expiration C(warning_days) window. +  - C(warning) - not expired, but will expire between now and the C(warning_days) window. +  - C(expired) - an expired certificate. +  - Certificate flagging follow this logic: +  - If the expiration date is before now then the certificate is classified as C(expired). +  - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning). +  - All other conditions are classified as C(ok). +  - The following keys are ALSO present in the certificate summary: +  - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted) +  - C(days_remaining) - The number of days until the certificate expires. +  - C(expiry) - The date the certificate expires on. +  - C(path) - The full path to the certificate on the examined host. +version_added: "1.0" +options: +  config_base: +    description: +      - Base path to OCP system settings. +    required: false +    default: /etc/origin +  warning_days: +    description: +      - Flag certificates which will expire in C(warning_days) days from now. +    required: false +    default: 30 +  show_all: +    description: +      - Enable this option to show analysis of ALL certificates examined by this module. +      - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported. +    required: false +    default: false + +author: "Tim Bielawa (@tbielawa) <tbielawa@redhat.com>" +''' + +EXAMPLES = ''' +# Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now +- openshift_cert_expiry: + +# Expand the warning window to show certificates expiring within a year from now +- openshift_cert_expiry: warning_days=365 + +# Show expired, soon to expire (now + 30 days), and all other certificates examined +- openshift_cert_expiry: show_all=true +''' + + +# We only need this for one thing, we don't care if it doesn't have +# that many public methods +# +# pylint: disable=too-few-public-methods +class FakeSecHead(object): +    """etcd does not begin their config file with an opening [section] as +required by the Python ConfigParser module. We hack around it by +slipping one in ourselves prior to parsing. + +Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 +    """ +    def __init__(self, fp): +        self.fp = fp +        self.sechead = '[ETCD]\n' + +    def readline(self): +        """Make this look like a file-type object""" +        if self.sechead: +            try: +                return self.sechead +            finally: +                self.sechead = None +        else: +            return self.fp.readline() + + +###################################################################### +def filter_paths(path_list): +    """`path_list` - A list of file paths to check. Only files which exist +will be returned +    """ +    return [p for p in path_list if os.path.exists(os.path.realpath(p))] + + +def load_and_handle_cert(cert_string, now, base64decode=False): +    """Load a certificate, split off the good parts, and return some +useful data + +Params: + +- `cert_string` (string) - a certificate loaded into a string object +- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against +- `base64decode` (bool) - run .decode('base64') on the input? + +Returns: +A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certificate_time_remaining) + +    """ +    if base64decode: +        _cert_string = cert_string.decode('base-64') +    else: +        _cert_string = cert_string + +    cert_loaded = OpenSSL.crypto.load_certificate( +        OpenSSL.crypto.FILETYPE_PEM, _cert_string) + +    ###################################################################### +    # Read all possible names from the cert +    cert_subjects = [] +    for name, value in cert_loaded.get_subject().get_components(): +        cert_subjects.append('{}:{}'.format(name, value)) + +    # To read SANs from a cert we must read the subjectAltName +    # extension from the X509 Object. What makes this more difficult +    # is that pyOpenSSL does not give extensions as a list, nor does +    # it provide a count of all loaded extensions. +    # +    # Rather, extensions are REQUESTED by index. We must iterate over +    # all extensions until we find the one called 'subjectAltName'. If +    # we don't find that extension we'll eventually request an +    # extension at an index where no extension exists (IndexError is +    # raised). When that happens we know that the cert has no SANs so +    # we break out of the loop. +    i = 0 +    checked_all_extensions = False +    while not checked_all_extensions: +        try: +            # Read the extension at index 'i' +            ext = cert_loaded.get_extension(i) +        except IndexError: +            # We tried to read an extension but it isn't there, that +            # means we ran out of extensions to check. Abort +            san = None +            checked_all_extensions = True +        else: +            # We were able to load the extension at index 'i' +            if ext.get_short_name() == 'subjectAltName': +                san = ext +                checked_all_extensions = True +            else: +                # Try reading the next extension +                i += 1 + +    if san is not None: +        # The X509Extension object for subjectAltName prints as a +        # string with the alt names separated by a comma and a +        # space. Split the string by ', ' and then add our new names +        # to the list of existing names +        cert_subjects.extend(str(san).split(', ')) + +    cert_subject = ', '.join(cert_subjects) +    ###################################################################### + +    # Grab the expiration date +    cert_expiry = cert_loaded.get_notAfter() +    cert_expiry_date = datetime.datetime.strptime( +        cert_expiry, +        # example get_notAfter() => 20180922170439Z +        '%Y%m%d%H%M%SZ') + +    time_remaining = cert_expiry_date - now + +    return (cert_subject, cert_expiry_date, time_remaining) + + +def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list): +    """Given metadata about a certificate under examination, classify it +    into one of three categories, 'ok', 'warning', and 'expired'. + +Params: + +- `cert_meta` dict - A dict with certificate metadata. Required fields +  include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'. +- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against +- `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires +- `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is +- `cert_list` list - A list to shove the classified cert into + +Return: +- `cert_list` - The updated list of classified certificates +    """ +    expiry_str = str(cert_meta['expiry']) +    # Categorization +    if cert_meta['expiry'] < now: +        # This already expired, must NOTIFY +        cert_meta['health'] = 'expired' +    elif time_remaining < expire_window: +        # WARN about this upcoming expirations +        cert_meta['health'] = 'warning' +    else: +        # Not expired or about to expire +        cert_meta['health'] = 'ok' + +    cert_meta['expiry'] = expiry_str +    cert_list.append(cert_meta) +    return cert_list + + +def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs): +    """Calculate the summary text for when the module finishes +running. This includes counts of each classification and what have +you. + +Params: + +- `certificates` (list of dicts) - Processed `expire_check_result` +  dicts with filled in `health` keys for system certificates. +- `kubeconfigs` - as above for kubeconfigs +- `etcd_certs` - as above for etcd certs + +Return: + +- `summary_results` (dict) - Counts of each cert type classification +  and total items examined. +    """ +    items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs + +    summary_results = { +        'system_certificates': len(certificates), +        'kubeconfig_certificates': len(kubeconfigs), +        'etcd_certificates': len(etcd_certs), +        'router_certs': len(router_certs), +        'registry_certs': len(registry_certs), +        'total': len(items), +        'ok': 0, +        'warning': 0, +        'expired': 0 +    } + +    summary_results['expired'] = len([c for c in items if c['health'] == 'expired']) +    summary_results['warning'] = len([c for c in items if c['health'] == 'warning']) +    summary_results['ok'] = len([c for c in items if c['health'] == 'ok']) + +    return summary_results + + +###################################################################### +# This is our module MAIN function after all, so there's bound to be a +# lot of code bundled up into one block +# +# pylint: disable=too-many-locals,too-many-locals,too-many-statements,too-many-branches +def main(): +    """This module examines certificates (in various forms) which compose +an OpenShift Container Platform cluster +    """ + +    module = AnsibleModule( +        argument_spec=dict( +            config_base=dict( +                required=False, +                default="/etc/origin", +                type='str'), +            warning_days=dict( +                required=False, +                default=30, +                type='int'), +            show_all=dict( +                required=False, +                default=False, +                type='bool') +        ), +        supports_check_mode=True, +    ) + +    # Basic scaffolding for OpenShift specific certs +    openshift_base_config_path = module.params['config_base'] +    openshift_master_config_path = os.path.normpath( +        os.path.join(openshift_base_config_path, "master/master-config.yaml") +    ) +    openshift_node_config_path = os.path.normpath( +        os.path.join(openshift_base_config_path, "node/node-config.yaml") +    ) +    openshift_cert_check_paths = [ +        openshift_master_config_path, +        openshift_node_config_path, +    ] + +    # Paths for Kubeconfigs. Additional kubeconfigs are conditionally +    # checked later in the code +    master_kube_configs = ['admin', 'openshift-master', +                           'openshift-node', 'openshift-router', +                           'openshift-registry'] + +    kubeconfig_paths = [] +    for m_kube_config in master_kube_configs: +        kubeconfig_paths.append( +            os.path.normpath( +                os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config) +            ) +        ) + +    # Validate some paths we have the ability to do ahead of time +    openshift_cert_check_paths = filter_paths(openshift_cert_check_paths) +    kubeconfig_paths = filter_paths(kubeconfig_paths) + +    # etcd, where do you hide your certs? Used when parsing etcd.conf +    etcd_cert_params = [ +        "ETCD_CA_FILE", +        "ETCD_CERT_FILE", +        "ETCD_PEER_CA_FILE", +        "ETCD_PEER_CERT_FILE", +    ] + +    # Expiry checking stuff +    now = datetime.datetime.now() +    # todo, catch exception for invalid input and return a fail_json +    warning_days = int(module.params['warning_days']) +    expire_window = datetime.timedelta(days=warning_days) + +    # Module stuff +    # +    # The results of our cert checking to return from the task call +    check_results = {} +    check_results['meta'] = {} +    check_results['meta']['warning_days'] = warning_days +    check_results['meta']['checked_at_time'] = str(now) +    check_results['meta']['warn_before_date'] = str(now + expire_window) +    check_results['meta']['show_all'] = str(module.params['show_all']) +    # All the analyzed certs accumulate here +    ocp_certs = [] + +    ###################################################################### +    # Sure, why not? Let's enable check mode. +    if module.check_mode: +        check_results['ocp_certs'] = [] +        module.exit_json( +            check_results=check_results, +            msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'], +            rc=0, +            changed=False +        ) + +    ###################################################################### +    # Check for OpenShift Container Platform specific certs +    ###################################################################### +    for os_cert in filter_paths(openshift_cert_check_paths): +        # Open up that config file and locate the cert and CA +        with open(os_cert, 'r') as fp: +            cert_meta = {} +            cfg = yaml.load(fp) +            # cert files are specified in parsed `fp` as relative to the path +            # of the original config file. 'master-config.yaml' with certFile +            # = 'foo.crt' implies that 'foo.crt' is in the same +            # directory. certFile = '../foo.crt' is in the parent directory. +            cfg_path = os.path.dirname(fp.name) +            cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile']) +            cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA']) + +        ###################################################################### +        # Load the certificate and the CA, parse their expiration dates into +        # datetime objects so we can manipulate them later +        for _, v in cert_meta.iteritems(): +            with open(v, 'r') as fp: +                cert = fp.read() +                cert_subject, cert_expiry_date, time_remaining = load_and_handle_cert(cert, now) + +                expire_check_result = { +                    'cert_cn': cert_subject, +                    'path': fp.name, +                    'expiry': cert_expiry_date, +                    'days_remaining': time_remaining.days, +                    'health': None, +                } + +                classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs) + +    ###################################################################### +    # /Check for OpenShift Container Platform specific certs +    ###################################################################### + +    ###################################################################### +    # Check service Kubeconfigs +    ###################################################################### +    kubeconfigs = [] + +    # There may be additional kubeconfigs to check, but their naming +    # is less predictable than the ones we've already assembled. + +    try: +        # Try to read the standard 'node-config.yaml' file to check if +        # this host is a node. +        with open(openshift_node_config_path, 'r') as fp: +            cfg = yaml.load(fp) + +        # OK, the config file exists, therefore this is a +        # node. Nodes have their own kubeconfig files to +        # communicate with the master API. Let's read the relative +        # path to that file from the node config. +        node_masterKubeConfig = cfg['masterKubeConfig'] +        # As before, the path to the 'masterKubeConfig' file is +        # relative to `fp` +        cfg_path = os.path.dirname(fp.name) +        node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig) + +        with open(node_kubeconfig, 'r') as fp: +            # Read in the nodes kubeconfig file and grab the good stuff +            cfg = yaml.load(fp) + +        c = cfg['users'][0]['user']['client-certificate-data'] +        (cert_subject, +         cert_expiry_date, +         time_remaining) = load_and_handle_cert(c, now, base64decode=True) + +        expire_check_result = { +            'cert_cn': cert_subject, +            'path': fp.name, +            'expiry': cert_expiry_date, +            'days_remaining': time_remaining.days, +            'health': None, +        } + +        classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) +    except IOError: +        # This is not a node +        pass + +    for kube in filter_paths(kubeconfig_paths): +        with open(kube, 'r') as fp: +            # TODO: Maybe consider catching exceptions here? +            cfg = yaml.load(fp) + +        # Per conversation, "the kubeconfigs you care about: +        # admin, router, registry should all be single +        # value". Following that advice we only grab the data for +        # the user at index 0 in the 'users' list. There should +        # not be more than one user. +        c = cfg['users'][0]['user']['client-certificate-data'] +        (cert_subject, +         cert_expiry_date, +         time_remaining) = load_and_handle_cert(c, now, base64decode=True) + +        expire_check_result = { +            'cert_cn': cert_subject, +            'path': fp.name, +            'expiry': cert_expiry_date, +            'days_remaining': time_remaining.days, +            'health': None, +        } + +        classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + +    ###################################################################### +    # /Check service Kubeconfigs +    ###################################################################### + +    ###################################################################### +    # Check etcd certs +    ###################################################################### +    # Some values may be duplicated, make this a set for now so we +    # unique them all +    etcd_certs_to_check = set([]) +    etcd_certs = [] +    etcd_cert_params.append('dne') +    try: +        with open('/etc/etcd/etcd.conf', 'r') as fp: +            etcd_config = ConfigParser.ConfigParser() +            etcd_config.readfp(FakeSecHead(fp)) + +        for param in etcd_cert_params: +            try: +                etcd_certs_to_check.add(etcd_config.get('ETCD', param)) +            except ConfigParser.NoOptionError: +                # That parameter does not exist, oh well... +                pass +    except IOError: +        # No etcd to see here, move along +        pass + +    for etcd_cert in filter_paths(etcd_certs_to_check): +        with open(etcd_cert, 'r') as fp: +            c = fp.read() +            (cert_subject, +             cert_expiry_date, +             time_remaining) = load_and_handle_cert(c, now) + +            expire_check_result = { +                'cert_cn': cert_subject, +                'path': fp.name, +                'expiry': cert_expiry_date, +                'days_remaining': time_remaining.days, +                'health': None, +            } + +            classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs) + +    ###################################################################### +    # /Check etcd certs +    ###################################################################### + +    ###################################################################### +    # Check router/registry certs +    # +    # These are saved as secrets in etcd. That means that we can not +    # simply read a file to grab the data. Instead we're going to +    # subprocess out to the 'oc get' command. On non-masters this +    # command will fail, that is expected so we catch that exception. +    ###################################################################### +    router_certs = [] +    registry_certs = [] + +    ###################################################################### +    # First the router certs +    try: +        router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(), +                                              stdout=subprocess.PIPE) +        router_ds = yaml.load(router_secrets_raw.communicate()[0]) +        router_c = router_ds['data']['tls.crt'] +        router_path = router_ds['metadata']['selfLink'] +    except TypeError: +        # YAML couldn't load the result, this is not a master +        pass +    except OSError: +        # The OC command doesn't exist here. Move along. +        pass +    else: +        (cert_subject, +         cert_expiry_date, +         time_remaining) = load_and_handle_cert(router_c, now, base64decode=True) + +        expire_check_result = { +            'cert_cn': cert_subject, +            'path': router_path, +            'expiry': cert_expiry_date, +            'days_remaining': time_remaining.days, +            'health': None, +        } + +        classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs) + +    ###################################################################### +    # Now for registry +    try: +        registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(), +                                                stdout=subprocess.PIPE) +        registry_ds = yaml.load(registry_secrets_raw.communicate()[0]) +        registry_c = registry_ds['data']['registry.crt'] +        registry_path = registry_ds['metadata']['selfLink'] +    except TypeError: +        # YAML couldn't load the result, this is not a master +        pass +    except OSError: +        # The OC command doesn't exist here. Move along. +        pass +    else: +        (cert_subject, +         cert_expiry_date, +         time_remaining) = load_and_handle_cert(registry_c, now, base64decode=True) + +        expire_check_result = { +            'cert_cn': cert_subject, +            'path': registry_path, +            'expiry': cert_expiry_date, +            'days_remaining': time_remaining.days, +            'health': None, +        } + +        classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs) + +    ###################################################################### +    # /Check router/registry certs +    ###################################################################### + +    res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs) + +    msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( +        count=res['total'], +        exp=res['expired'], +        warn=res['warning'], +        ok=res['ok'], +        window=int(module.params['warning_days']), +    ) + +    # By default we only return detailed information about expired or +    # warning certificates. If show_all is true then we will print all +    # the certificates examined. +    if not module.params['show_all']: +        check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']] +        check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']] +        check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']] +        check_results['registry'] = [crt for crt in registry_certs if crt['health'] in ['expired', 'warning']] +        check_results['router'] = [crt for crt in router_certs if crt['health'] in ['expired', 'warning']] +    else: +        check_results['ocp_certs'] = ocp_certs +        check_results['kubeconfigs'] = kubeconfigs +        check_results['etcd'] = etcd_certs +        check_results['registry'] = registry_certs +        check_results['router'] = router_certs + +    # Sort the final results to report in order of ascending safety +    # time. That is to say, the certificates which will expire sooner +    # will be at the front of the list and certificates which will +    # expire later are at the end. Router and registry certs should be +    # limited to just 1 result, so don't bother sorting those. +    check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) +    check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) +    check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + +    # This module will never change anything, but we might want to +    # change the return code parameter if there is some catastrophic +    # error we noticed earlier +    module.exit_json( +        check_results=check_results, +        summary=res, +        msg=msg, +        rc=0, +        changed=False +    ) + +###################################################################### +# It's just the way we do things in Ansible. So disable this warning +# +# pylint: disable=wrong-import-position,import-error +from ansible.module_utils.basic import AnsibleModule +if __name__ == '__main__': +    main() diff --git a/roles/openshift_certificate_expiry/meta/main.yml b/roles/openshift_certificate_expiry/meta/main.yml new file mode 100644 index 000000000..c13b29ba5 --- /dev/null +++ b/roles/openshift_certificate_expiry/meta/main.yml @@ -0,0 +1,16 @@ +--- +galaxy_info: +  author: Tim Bielawa +  description: OpenShift Certificate Expiry Checker +  company: Red Hat, Inc. +  license: Apache License, Version 2.0 +  min_ansible_version: 2.1 +  version: 1.0 +  platforms: +  - name: EL +    versions: +    - 7 +  categories: +  - cloud +  - system +dependencies: [] diff --git a/roles/openshift_certificate_expiry/tasks/main.yml b/roles/openshift_certificate_expiry/tasks/main.yml new file mode 100644 index 000000000..139d5de6e --- /dev/null +++ b/roles/openshift_certificate_expiry/tasks/main.yml @@ -0,0 +1,30 @@ +--- +- name: Check cert expirys on host +  openshift_cert_expiry: +    warning_days: "{{ openshift_certificate_expiry_warning_days|int }}" +    config_base: "{{ openshift_certificate_expiry_config_base }}" +    show_all: "{{ openshift_certificate_expiry_show_all|bool }}" +  register: check_results + +- name: Generate expiration report HTML +  become: no +  run_once: yes +  template: +    src: cert-expiry-table.html.j2 +    dest: "{{ openshift_certificate_expiry_html_report_path }}" +  delegate_to: localhost +  when: "{{ openshift_certificate_expiry_generate_html_report|bool }}" + +- name: Generate the result JSON string +  run_once: yes +  set_fact: json_result_string="{{ hostvars|oo_cert_expiry_results_to_json(play_hosts) }}" +  when: "{{ openshift_certificate_expiry_save_json_results|bool }}" + +- name: Generate results JSON file +  become: no +  run_once: yes +  template: +    src: save_json_results.j2 +    dest: "{{ openshift_certificate_expiry_json_results_path }}" +  delegate_to: localhost +  when: "{{ openshift_certificate_expiry_save_json_results|bool }}" diff --git a/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 b/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 new file mode 100644 index 000000000..b05110336 --- /dev/null +++ b/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 @@ -0,0 +1,124 @@ +<!DOCTYPE html> +<html> +  <head> +    <meta charset="UTF-8" /> +    <title>OCP Certificate Expiry Report</title> +    {# For fancy icons and a pleasing font #} +    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" /> +    <link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,700" rel="stylesheet" /> +    <style type="text/css"> +      body { +      font-family: 'Source Sans Pro', sans-serif; +      margin-left: 50px; +      margin-right: 50px; +      margin-bottom: 20px; +      padding-top: 70px; +      } +      table { +      border-collapse: collapse; +      margin-bottom: 20px; +      } +      table, th, td { +      border: 1px solid black; +      } +      th, td { +      padding: 5px; +      } +      .cert-kind { +      margin-top: 5px; +      margin-bottom: 5px; +      } +      footer { +      font-size: small; +      text-align: center; +      } +      tr.odd { +      background-color: #f2f2f2; +      } +    </style> +  </head> +  <body> +    <nav class="navbar navbar-default navbar-fixed-top"> +      <div class="container-fluid"> +        <div class="navbar-header"> +          <a class="navbar-brand" href="#">OCP Certificate Expiry Report</a> +        </div> +        <div class="collapse navbar-collapse"> +          <p class="navbar-text navbar-right"> +	    <a href="https://docs.openshift.com/container-platform/latest/install_config/redeploying_certificates.html" +	       target="_blank" +	       class="navbar-link"> +	       <i class="glyphicon glyphicon-book"></i> Redeploying Certificates +	    </a> +	  </p> +        </div> +      </div> +    </nav> + +    {# Each host has a header and table to itself #} +    {% for host in play_hosts %} +      <h1>{{ host }}</h1> + +      <p> +        {{ hostvars[host].check_results.msg }} +      </p> +      <ul> +        <li><b>Expirations checked at:</b> {{ hostvars[host].check_results.check_results.meta.checked_at_time }}</li> +        <li><b>Warn after date:</b> {{ hostvars[host].check_results.check_results.meta.warn_before_date }}</li> +      </ul> + +      <table border="1" width="100%"> +        {# These are hard-coded right now, but should be grabbed dynamically from the registered results #} +        {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs', 'router', 'registry'] -%} +          <tr> +            <th colspan="6" style="text-align:center"><h2 class="cert-kind">{{ kind }}</h2></th> +          </tr> + +          <tr> +            <th> </th> +            <th style="width:33%">Certificate Common/Alt Name(s)</th> +            <th>Health</th> +            <th>Days Remaining</th> +            <th>Expiration Date</th> +            <th>Path</th> +          </tr> + +          {# A row for each certificate examined #} +          {%- for v in hostvars[host].check_results.check_results[kind] -%} + +            {# Let's add some flair and show status visually with fancy icons #} +            {% if v.health == 'ok' %} +              {% set health_icon = 'glyphicon glyphicon-ok' %} +            {% elif v.health == 'warning' %} +              {% set health_icon = 'glyphicon glyphicon-alert' %} +            {% else %} +              {% set health_icon = 'glyphicon glyphicon-remove' %} +            {% endif %} + +            <tr class="{{ loop.cycle('odd', 'even') }}"> +              <td style="text-align:center"><i class="{{ health_icon }}"></i></td> +              <td style="width:33%">{{ v.cert_cn }}</td> +              <td>{{ v.health }}</td> +              <td>{{ v.days_remaining }}</td> +              <td>{{ v.expiry }}</td> +              <td>{{ v.path }}</td> +            </tr> +          {% endfor %} +          {# end row generation per cert of this type #} +        {% endfor %} +        {# end generation for each kind of cert block #} +      </table> +      <hr /> +    {% endfor %} +    {# end section generation for each host #} + +    <footer> +      <p> +        Expiration report generated by <a href="https://github.com/openshift/openshift-ansible" target="_blank">openshift-ansible</a> +      </p> +      <p> +        Status icons from bootstrap/glyphicon +      </p> +    </footer> +  </body> +</html> diff --git a/roles/openshift_certificate_expiry/templates/save_json_results.j2 b/roles/openshift_certificate_expiry/templates/save_json_results.j2 new file mode 100644 index 000000000..c1173d9ea --- /dev/null +++ b/roles/openshift_certificate_expiry/templates/save_json_results.j2 @@ -0,0 +1 @@ +{{ json_result_string | to_nice_json(indent=2)}} diff --git a/roles/openshift_cli/library/openshift_container_binary_sync.py b/roles/openshift_cli/library/openshift_container_binary_sync.py index fd290c6fc..9ff738d14 100644 --- a/roles/openshift_cli/library/openshift_container_binary_sync.py +++ b/roles/openshift_cli/library/openshift_container_binary_sync.py @@ -83,8 +83,13 @@ class BinarySyncer(object):      def _sync_symlink(self, binary_name, link_to):          """ Ensure the given binary name exists and links to the expected binary. """ + +        # The symlink we are creating:          link_path = os.path.join(self.bin_dir, binary_name) -        link_dest = os.path.join(self.bin_dir, binary_name) + +        # The expected file we should be linking to: +        link_dest = os.path.join(self.bin_dir, link_to) +          if not os.path.exists(link_path) or \                  not os.path.islink(link_path) or \                  os.path.realpath(link_path) != os.path.realpath(link_dest): diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 4e525a2da..c29df1873 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -30,7 +30,7 @@  - name: Create OpenShift registry    command: > -    {{ openshift.common.admin_binary }} registry --create +    {{ openshift.common.client_binary }} adm registry --create      --config={{ openshift_hosted_kubeconfig }}      {% if replicas > 1 -%}      --replicas={{ replicas }} diff --git a/roles/openshift_hosted/tasks/registry/secure.yml b/roles/openshift_hosted/tasks/registry/secure.yml index 664edef41..d2f6ba5f6 100644 --- a/roles/openshift_hosted/tasks/registry/secure.yml +++ b/roles/openshift_hosted/tasks/registry/secure.yml @@ -33,7 +33,7 @@  - name: Create registry certificates if they do not exist    command: > -    {{ openshift.common.admin_binary }} ca create-server-cert +    {{ openshift.common.client_binary }} adm ca create-server-cert      --signer-cert=/etc/origin/master/ca.crt      --signer-key=/etc/origin/master/ca.key      --signer-serial=/etc/origin/master/ca.serial.txt diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 0cad19c34..b944fa522 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -48,7 +48,7 @@  - name: Create OpenShift router    command: > -    {{ openshift.common.admin_binary }} router --create +    {{ openshift.common.client_binary }} adm router --create      --config={{ openshift_hosted_kubeconfig }}      {% if replicas > 1 -%}      --replicas={{ replicas }} @@ -73,7 +73,7 @@      {% if openshift.hosted.router.name | default(none) is not none -%}      {{ openshift.hosted.router.name }}      {% endif -%} -     +    register: openshift_hosted_router_results    changed_when: "'service exists' not in openshift_hosted_router_results.stdout"    failed_when: "openshift_hosted_router_results.rc != 0 and 'service exists' not in openshift_hosted_router_results.stdout and 'deployment_config' not in openshift_hosted_router_results.stderr and 'service' not in openshift_hosted_router_results.stderr" diff --git a/roles/openshift_hosted_logging/tasks/deploy_logging.yaml b/roles/openshift_hosted_logging/tasks/deploy_logging.yaml index 65af1c08e..c8d376194 100644 --- a/roles/openshift_hosted_logging/tasks/deploy_logging.yaml +++ b/roles/openshift_hosted_logging/tasks/deploy_logging.yaml @@ -25,7 +25,7 @@    - name: "Create logging project"      command: > -      {{ openshift.common.admin_binary }} --config={{ mktemp.stdout }}/admin.kubeconfig new-project logging +      {{ openshift.common.client_binary }} adm --config={{ mktemp.stdout }}/admin.kubeconfig new-project logging      when: logging_project_result.stdout == ""    - name: "Changing projects" @@ -51,19 +51,19 @@    - name: "Set permissions for logging-deployer service account"      command: > -      {{ openshift.common.admin_binary }} --config={{ mktemp.stdout }}/admin.kubeconfig policy add-cluster-role-to-user oauth-editor system:serviceaccount:logging:logging-deployer +      {{ openshift.common.client_binary }} adm --config={{ mktemp.stdout }}/admin.kubeconfig policy add-cluster-role-to-user oauth-editor system:serviceaccount:logging:logging-deployer      register: permiss_output      failed_when: "permiss_output.rc == 1 and 'exists' not in permiss_output.stderr"    - name: "Set permissions for fluentd"      command: > -      {{ openshift.common.admin_binary}} policy add-scc-to-user privileged system:serviceaccount:logging:aggregated-logging-fluentd +      {{ openshift.common.client_binary }} adm policy add-scc-to-user privileged system:serviceaccount:logging:aggregated-logging-fluentd      register: fluentd_output      failed_when: "fluentd_output.rc == 1 and 'exists' not in fluentd_output.stderr"    - name: "Set additional permissions for fluentd"      command: > -      {{ openshift.common.admin_binary}} policy add-cluster-role-to-user cluster-reader system:serviceaccount:logging:aggregated-logging-fluentd +      {{ openshift.common.client_binary }} adm policy add-cluster-role-to-user cluster-reader system:serviceaccount:logging:aggregated-logging-fluentd      register: fluentd2_output      failed_when: "fluentd2_output.rc == 1 and 'exists' not in fluentd2_output.stderr" diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml index d1cc5b274..28e4e46e9 100644 --- a/roles/openshift_manage_node/tasks/main.yml +++ b/roles/openshift_manage_node/tasks/main.yml @@ -26,7 +26,7 @@  - name: Set node schedulability    command: > -    {{ openshift.common.admin_binary }} manage-node {{ hostvars[item].openshift.node.nodename }} --schedulable={{ 'true' if hostvars[item].openshift.node.schedulable | bool else 'false' }} +    {{ openshift.common.client_binary }} adm manage-node {{ hostvars[item].openshift.node.nodename }} --schedulable={{ 'true' if hostvars[item].openshift.node.schedulable | bool else 'false' }}      --config={{ openshift_manage_node_kubeconfig }}      -n default    with_items: "{{ openshift_nodes }}" diff --git a/roles/openshift_manageiq/tasks/main.yaml b/roles/openshift_manageiq/tasks/main.yaml index b0cc99c80..bdaf64b3f 100644 --- a/roles/openshift_manageiq/tasks/main.yaml +++ b/roles/openshift_manageiq/tasks/main.yaml @@ -10,7 +10,7 @@  - name: Add Management Infrastructure project    command: > -    {{ openshift.common.admin_binary }} new-project +    {{ openshift.common.client_binary }} adm new-project      management-infra      --description="Management Infrastructure"      --config={{manage_iq_tmp_conf}} @@ -52,7 +52,7 @@  - name: Configure role/user permissions    command: > -    {{ openshift.common.admin_binary }} {{item}} +    {{ openshift.common.client_binary }} adm {{item}}      --config={{manage_iq_tmp_conf}}    with_items: "{{manage_iq_tasks}}"    register: osmiq_perm_task @@ -61,7 +61,7 @@  - name: Configure 3_2 role/user permissions    command: > -    {{ openshift.common.admin_binary }} {{item}} +    {{ openshift.common.client_binary }} adm {{item}}      --config={{manage_iq_tmp_conf}}    with_items: "{{manage_iq_openshift_3_2_tasks}}"    register: osmiq_perm_3_2_task diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 1a59717c7..1d6758c4a 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -57,7 +57,7 @@  - name: Create the policy file if it does not already exist    command: > -    {{ openshift.common.admin_binary }} create-bootstrap-policy-file +    {{ openshift.common.client_binary }} adm create-bootstrap-policy-file        --filename={{ openshift_master_policy }}    args:      creates: "{{ openshift_master_policy }}" diff --git a/roles/openshift_master_certificates/tasks/main.yml b/roles/openshift_master_certificates/tasks/main.yml index ffde59358..e9b7de330 100644 --- a/roles/openshift_master_certificates/tasks/main.yml +++ b/roles/openshift_master_certificates/tasks/main.yml @@ -52,7 +52,7 @@  - name: Create the master certificates if they do not already exist    command: > -    {{ openshift.common.admin_binary }} create-master-certs +    {{ openshift.common.client_binary }} adm create-master-certs      {% for named_ca_certificate in openshift.master.named_certificates | default([]) | oo_collect('cafile') %}      --certificate-authority {{ named_ca_certificate }}      {% endfor %} diff --git a/roles/openshift_metrics/tasks/install.yml b/roles/openshift_metrics/tasks/install.yml index 9c4eb22d7..4dabd314f 100644 --- a/roles/openshift_metrics/tasks/install.yml +++ b/roles/openshift_metrics/tasks/install.yml @@ -30,7 +30,7 @@  - name: Add edit permission to the openshift-infra project to metrics-deployer SA    command: > -    {{ openshift.common.admin_binary }} +    {{ openshift.common.client_binary }} adm      --config={{ openshift_metrics_kubeconfig }}      --namespace openshift-infra      policy add-role-to-user edit @@ -48,7 +48,7 @@  - name: Add cluster-reader permission to the openshift-infra project to heapster SA    command: > -    {{ openshift.common.admin_binary }} +    {{ openshift.common.client_binary }} adm      --config={{ openshift_metrics_kubeconfig }}      --namespace openshift-infra      policy add-cluster-role-to-user cluster-reader diff --git a/roles/openshift_node_certificates/tasks/main.yml b/roles/openshift_node_certificates/tasks/main.yml index 80ab4bb1d..69bcd3668 100644 --- a/roles/openshift_node_certificates/tasks/main.yml +++ b/roles/openshift_node_certificates/tasks/main.yml @@ -44,7 +44,7 @@  - name: Generate the node client config    command: > -    {{ openshift.common.admin_binary }} create-api-client-config +    {{ openshift.common.client_binary }} adm create-api-client-config        {% for named_ca_certificate in hostvars[openshift_ca_host].openshift.master.named_certificates | default([]) | oo_collect('cafile') %}        --certificate-authority {{ named_ca_certificate }}        {% endfor %} @@ -63,7 +63,7 @@  - name: Generate the node server certificate    command: > -    {{ openshift.common.admin_binary }} ca create-server-cert +    {{ openshift.common.client_binary }} adm ca create-server-cert        --cert={{ openshift_node_generated_config_dir }}/server.crt        --key={{ openshift_generated_configs_dir }}/node-{{ openshift.common.hostname }}/server.key        --overwrite=true diff --git a/roles/openshift_projects/tasks/main.yml b/roles/openshift_projects/tasks/main.yml index 62a357cf7..30d58afd3 100644 --- a/roles/openshift_projects/tasks/main.yml +++ b/roles/openshift_projects/tasks/main.yml @@ -20,7 +20,7 @@  - name: Create projects    command: > -    {{ openshift.common.admin_binary }} --config={{ mktemp.stdout }}/admin.kubeconfig +    {{ openshift.common.client_binary }} adm --config={{ mktemp.stdout }}/admin.kubeconfig      new-project {{ item.item.key }}      {% if item.item.value.default_node_selector | default(none) != none %}      {{ '--node-selector=' ~ item.item.value.default_node_selector }} diff --git a/roles/openshift_serviceaccounts/tasks/main.yml b/roles/openshift_serviceaccounts/tasks/main.yml index e90384d37..1ff9e6dcb 100644 --- a/roles/openshift_serviceaccounts/tasks/main.yml +++ b/roles/openshift_serviceaccounts/tasks/main.yml @@ -26,7 +26,7 @@  - name: Grant the user access to the appropriate scc    command: > -      {{ openshift.common.admin_binary }} policy add-scc-to-user +      {{ openshift.common.client_binary }} adm policy add-scc-to-user        {{ item.1.item }} system:serviceaccount:{{ openshift_serviceaccounts_namespace }}:{{ item.0 }}    when: "openshift.common.version_gte_3_1_or_1_1 and item.1.rc == 0 and 'system:serviceaccount:{{ openshift_serviceaccounts_namespace }}:{{ item.0 }}' not in {{ (item.1.stdout | from_yaml).users | default([]) }}"    with_nested: diff --git a/utils/Makefile b/utils/Makefile index 59aff92fd..62f08f74b 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -31,6 +31,8 @@ ASCII2MAN = a2x -D $(dir $@) -d manpage -f manpage $<  MANPAGES := docs/man/man1/atomic-openshift-installer.1  VERSION := 1.3 +PEPEXCLUDES := E501,E121,E124 +  sdist: clean  	python setup.py sdist  	rm -fR $(SHORTNAME).egg-info @@ -80,7 +82,7 @@ ci-pylint:  	@echo "#############################################"  	@echo "# Running PyLint Tests in virtualenv"  	@echo "#############################################" -	. $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py +	. $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py  ci-list-deps:  	@echo "#############################################" @@ -94,13 +96,17 @@ ci-pyflakes:  	@echo "#################################################"  	. $(NAME)env/bin/activate && pyflakes src/ooinstall/*.py  	. $(NAME)env/bin/activate && pyflakes ../callback_plugins/openshift_quick_installer.py +	. $(NAME)env/bin/activate && pyflakes ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py  ci-pep8:  	@echo "#############################################"  	@echo "# Running PEP8 Compliance Tests in virtualenv"  	@echo "#############################################" -	. $(NAME)env/bin/activate && pep8 --ignore=E501,E121,E124 src/$(SHORTNAME)/ -	. $(NAME)env/bin/activate && pep8 --ignore=E501,E121,E124 ../callback_plugins/openshift_quick_installer.py +	. $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) src/$(SHORTNAME)/ +	. $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) ../callback_plugins/openshift_quick_installer.py +# This one excludes E402 because it is an ansible module and the +# boilerplate import statement is expected to be at the bottom +	. $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES),E402 ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py  ci: clean virtualenv ci-list-deps ci-pep8 ci-pylint ci-pyflakes ci-unittests  	: diff --git a/utils/test-requirements.txt b/utils/test-requirements.txt index f2216a177..af91ab6a7 100644 --- a/utils/test-requirements.txt +++ b/utils/test-requirements.txt @@ -9,3 +9,4 @@ flake8  PyYAML  click  backports.functools_lru_cache +pyOpenSSL  | 
