diff options
Diffstat (limited to 'roles')
110 files changed, 1604 insertions, 1346 deletions
diff --git a/roles/cockpit/defaults/main.yml b/roles/cockpit/defaults/main.yml new file mode 100644 index 000000000..cbe5bb92b --- /dev/null +++ b/roles/cockpit/defaults/main.yml @@ -0,0 +1,8 @@ +--- +r_cockpit_firewall_enabled: True +r_cockpit_use_firewalld: False + +r_cockpit_os_firewall_deny: [] +r_cockpit_os_firewall_allow: +- service: cockpit-ws + port: 9090/tcp diff --git a/roles/cockpit/meta/main.yml b/roles/cockpit/meta/main.yml index 0f507e75e..8c0ed3cb8 100644 --- a/roles/cockpit/meta/main.yml +++ b/roles/cockpit/meta/main.yml @@ -12,7 +12,4 @@ galaxy_info: categories: - cloud dependencies: -- role: os_firewall - os_firewall_allow: - - service: cockpit-ws - port: 9090/tcp +- role: lib_os_firewall diff --git a/roles/cockpit/tasks/firewall.yml b/roles/cockpit/tasks/firewall.yml new file mode 100644 index 000000000..e597ac84d --- /dev/null +++ b/roles/cockpit/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_cockpit_firewall_enabled | bool and not r_cockpit_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_cockpit_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_cockpit_os_firewall_deny }}" + +- when: r_cockpit_firewall_enabled | bool and r_cockpit_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_cockpit_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_cockpit_os_firewall_deny }}" diff --git a/roles/cockpit/tasks/main.yml b/roles/cockpit/tasks/main.yml index 57f49ea11..066ee3f3b 100644 --- a/roles/cockpit/tasks/main.yml +++ b/roles/cockpit/tasks/main.yml @@ -1,4 +1,8 @@ --- +- name: setup firewall + include: firewall.yml + static: yes + - name: Install cockpit-ws package: name={{ item }} state=present with_items: diff --git a/roles/docker/meta/main.yml b/roles/docker/meta/main.yml index cd4083572..b773a417c 100644 --- a/roles/docker/meta/main.yml +++ b/roles/docker/meta/main.yml @@ -10,5 +10,4 @@ galaxy_info: versions: - 7 dependencies: -- role: os_firewall - role: lib_openshift diff --git a/roles/etcd/defaults/main.yaml b/roles/etcd/defaults/main.yaml index c0d1d5946..d12d7a358 100644 --- a/roles/etcd/defaults/main.yaml +++ b/roles/etcd/defaults/main.yaml @@ -1,4 +1,7 @@ --- +r_etcd_firewall_enabled: True +r_etcd_use_firewalld: False + etcd_initial_cluster_state: new etcd_initial_cluster_token: etcd-cluster-1 @@ -7,4 +10,13 @@ etcd_listen_peer_urls: "{{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_ etcd_advertise_client_urls: "{{ etcd_url_scheme }}://{{ etcd_ip }}:{{ etcd_client_port }}" etcd_listen_client_urls: "{{ etcd_url_scheme }}://{{ etcd_ip }}:{{ etcd_client_port }}" +etcd_client_port: 2379 +etcd_peer_port: 2380 + etcd_systemd_dir: "/etc/systemd/system/{{ etcd_service }}.service.d" +r_etcd_os_firewall_deny: [] +r_etcd_os_firewall_allow: +- service: etcd + port: "{{etcd_client_port}}/tcp" +- service: etcd peering + port: "{{ etcd_peer_port }}/tcp" diff --git a/roles/etcd/meta/main.yml b/roles/etcd/meta/main.yml index 689c07a84..9a955c822 100644 --- a/roles/etcd/meta/main.yml +++ b/roles/etcd/meta/main.yml @@ -17,11 +17,6 @@ galaxy_info: - system dependencies: - role: lib_openshift -- role: os_firewall - os_firewall_allow: - - service: etcd - port: "{{etcd_client_port}}/tcp" - - service: etcd peering - port: "{{ etcd_peer_port }}/tcp" +- role: lib_os_firewall - role: etcd_server_certificates - role: etcd_common diff --git a/roles/etcd/tasks/firewall.yml b/roles/etcd/tasks/firewall.yml new file mode 100644 index 000000000..4d0f6290a --- /dev/null +++ b/roles/etcd/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_etcd_firewall_enabled | bool and not r_etcd_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_etcd_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_etcd_os_firewall_deny }}" + +- when: r_etcd_firewall_enabled | bool and r_etcd_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_etcd_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_etcd_os_firewall_deny }}" diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 8c2f392ee..78e543ef1 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -6,6 +6,10 @@ etcd_hostname: "{{ etcd_hostname }}" etcd_ip: "{{ etcd_ip }}" +- name: setup firewall + include: firewall.yml + static: yes + - name: Install etcd package: name=etcd{{ '-' + etcd_version if etcd_version is defined else '' }} state=present when: not etcd_is_containerized | bool diff --git a/roles/lib_os_firewall/README.md b/roles/lib_os_firewall/README.md new file mode 100644 index 000000000..ba8c84865 --- /dev/null +++ b/roles/lib_os_firewall/README.md @@ -0,0 +1,63 @@ +lib_os_firewall +=========== + +lib_os_firewall manages iptables firewall settings for a minimal use +case (Adding/Removing rules based on protocol and port number). + +Note: firewalld is not supported on Atomic Host +https://bugzilla.redhat.com/show_bug.cgi?id=1403331 + +Requirements +------------ + +Ansible 2.2 + +Role Variables +-------------- + +| Name | Default | | +|---------------------------|---------|----------------------------------------| +| os_firewall_allow | [] | List of service,port mappings to allow | +| os_firewall_deny | [] | List of service, port mappings to deny | + +Dependencies +------------ + +None. + +Example Playbook +---------------- + +Use iptables and open tcp ports 80 and 443: +``` +--- +- hosts: servers + vars: + os_firewall_use_firewalld: false + os_firewall_allow: + - service: httpd + port: 80/tcp + - service: https + port: 443/tcp + tasks: + - include_role: + name: lib_os_firewall + + - name: set allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + with_items: "{{ os_firewall_allow }}" +``` + + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ +Jason DeTiberus - jdetiber@redhat.com diff --git a/roles/os_firewall/library/os_firewall_manage_iptables.py b/roles/lib_os_firewall/library/os_firewall_manage_iptables.py index aeee3ede8..aeee3ede8 100755 --- a/roles/os_firewall/library/os_firewall_manage_iptables.py +++ b/roles/lib_os_firewall/library/os_firewall_manage_iptables.py diff --git a/roles/nuage_master/defaults/main.yml b/roles/nuage_master/defaults/main.yml new file mode 100644 index 000000000..ffab25775 --- /dev/null +++ b/roles/nuage_master/defaults/main.yml @@ -0,0 +1,10 @@ +--- +r_nuage_master_firewall_enabled: True +r_nuage_master_use_firewalld: False + +nuage_mon_rest_server_port: '9443' + +r_nuage_master_os_firewall_deny: [] +r_nuage_master_os_firewall_allow: +- service: openshift-monitor + port: "{{ nuage_mon_rest_server_port }}/tcp" diff --git a/roles/nuage_master/handlers/main.yaml b/roles/nuage_master/handlers/main.yaml index 162aaae1a..ad7bbb111 100644 --- a/roles/nuage_master/handlers/main.yaml +++ b/roles/nuage_master/handlers/main.yaml @@ -3,10 +3,6 @@ become: yes systemd: name=nuage-openshift-monitor state=restarted -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (not openshift_master_ha | bool) and (not master_service_status_changed | default(false)) - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted when: > diff --git a/roles/nuage_master/meta/main.yml b/roles/nuage_master/meta/main.yml index e3ed9ac71..3da340c85 100644 --- a/roles/nuage_master/meta/main.yml +++ b/roles/nuage_master/meta/main.yml @@ -16,8 +16,5 @@ dependencies: - role: nuage_ca - role: nuage_common - role: openshift_etcd_client_certificates -- role: os_firewall - role: lib_openshift - os_firewall_allow: - - service: openshift-monitor - port: "{{ nuage_mon_rest_server_port }}/tcp" +- role: lib_os_firewall diff --git a/roles/nuage_master/tasks/firewall.yml b/roles/nuage_master/tasks/firewall.yml new file mode 100644 index 000000000..0057dc9ab --- /dev/null +++ b/roles/nuage_master/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_nuage_master_firewall_enabled | bool and not r_nuage_master_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_nuage_master_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_nuage_master_os_firewall_deny }}" + +- when: r_nuage_master_firewall_enabled | bool and r_nuage_master_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_nuage_master_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_nuage_master_os_firewall_deny }}" diff --git a/roles/nuage_master/tasks/main.yaml b/roles/nuage_master/tasks/main.yaml index 4f8adb63e..d0363c981 100644 --- a/roles/nuage_master/tasks/main.yaml +++ b/roles/nuage_master/tasks/main.yaml @@ -1,4 +1,8 @@ --- +- name: setup firewall + include: firewall.yml + static: yes + - name: Create directory /usr/share/nuage-openshift-monitor become: yes file: path=/usr/share/nuage-openshift-monitor state=directory @@ -45,7 +49,6 @@ become: yes template: src=nuage-openshift-monitor.j2 dest=/usr/share/nuage-openshift-monitor/nuage-openshift-monitor.yaml owner=root mode=0644 notify: - - restart master - restart master api - restart master controllers - restart nuage-openshift-monitor diff --git a/roles/nuage_master/tasks/serviceaccount.yml b/roles/nuage_master/tasks/serviceaccount.yml index eee448e2c..fbf2c4f8d 100644 --- a/roles/nuage_master/tasks/serviceaccount.yml +++ b/roles/nuage_master/tasks/serviceaccount.yml @@ -1,26 +1,6 @@ --- -- name: Create temporary directory for admin kubeconfig - command: mktemp -u /tmp/openshift-ansible-XXXXXXX.kubeconfig - register: nuage_tmp_conf_mktemp - changed_when: False - run_once: True - delegate_to: "{{ nuage_ca_master }}" - -- set_fact: - nuage_tmp_conf: "{{ nuage_tmp_conf_mktemp.stdout }}" - run_once: True - delegate_to: "{{ nuage_ca_master }}" - -- name: Copy Configuration to temporary conf - command: > - cp {{ openshift.common.config_base }}/master/admin.kubeconfig {{nuage_tmp_conf}} - changed_when: false - run_once: True - delegate_to: "{{ nuage_ca_master }}" - - name: Create Admin Service Account oc_serviceaccount: - kubeconfig: "{{ openshift_master_config_dir }}/admin.kubeconfig" name: nuage namespace: default state: present @@ -28,13 +8,12 @@ delegate_to: "{{ nuage_ca_master }}" - name: Configure role/user permissions - command: > - {{ openshift.common.client_binary }} adm {{item}} - --config={{ nuage_tmp_conf }} - with_items: "{{nuage_tasks}}" - register: osnuage_perm_task - failed_when: "'the object has been modified' not in osnuage_perm_task.stderr and osnuage_perm_task.rc != 0" - changed_when: osnuage_perm_task.rc == 0 + oc_adm_policy_user: + namespace: default + resource_name: "{{ item.resource_name }}" + resource_kind: "{{ item.resource_kind }}" + user: "{{ item.user }}" + with_items: "{{ nuage_tasks }}" run_once: True delegate_to: "{{ nuage_ca_master }}" @@ -52,10 +31,3 @@ --user={{ nuage_service_account }} delegate_to: "{{ nuage_ca_master }}" run_once: True - -- name: Clean temporary configuration file - command: > - rm -f {{nuage_tmp_conf}} - changed_when: false - delegate_to: "{{ nuage_ca_master }}" - run_once: True diff --git a/roles/nuage_master/vars/main.yaml b/roles/nuage_master/vars/main.yaml index 651d5775c..57d5d2595 100644 --- a/roles/nuage_master/vars/main.yaml +++ b/roles/nuage_master/vars/main.yaml @@ -23,4 +23,6 @@ nuage_master_crt_dir: /usr/share/nuage-openshift-monitor nuage_service_account: system:serviceaccount:default:nuage nuage_tasks: - - policy add-cluster-role-to-user cluster-reader {{ nuage_service_account }} +- resource_kind: cluster-role + resource_name: cluster-reader + user: "{{ nuage_service_account }}" diff --git a/roles/nuage_node/defaults/main.yml b/roles/nuage_node/defaults/main.yml new file mode 100644 index 000000000..b3d2e3cec --- /dev/null +++ b/roles/nuage_node/defaults/main.yml @@ -0,0 +1,12 @@ +--- +r_nuage_node_firewall_enabled: True +r_nuage_node_use_firewalld: False + +nuage_mon_rest_server_port: '9443' + +r_nuage_node_os_firewall_deny: [] +r_nuage_node_os_firewall_allow: +- service: vxlan + port: 4789/udp +- service: nuage-monitor + port: "{{ nuage_mon_rest_server_port }}/tcp" diff --git a/roles/nuage_node/meta/main.yml b/roles/nuage_node/meta/main.yml index 3e2a5e0c9..9b0315054 100644 --- a/roles/nuage_node/meta/main.yml +++ b/roles/nuage_node/meta/main.yml @@ -15,9 +15,4 @@ galaxy_info: dependencies: - role: nuage_common - role: nuage_ca -- role: os_firewall - os_firewall_allow: - - service: vxlan - port: 4789/udp - - service: nuage-monitor - port: "{{ nuage_mon_rest_server_port }}/tcp" +- role: lib_os_firewall diff --git a/roles/nuage_node/tasks/firewall.yml b/roles/nuage_node/tasks/firewall.yml new file mode 100644 index 000000000..baf600d57 --- /dev/null +++ b/roles/nuage_node/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_nuage_node_firewall_enabled | bool and not r_nuage_node_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_nuage_node_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_nuage_node_os_firewall_deny }}" + +- when: r_nuage_node_firewall_enabled | bool and r_nuage_node_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_nuage_node_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_nuage_node_os_firewall_deny }}" diff --git a/roles/nuage_node/tasks/main.yaml b/roles/nuage_node/tasks/main.yaml index 928f9e2e6..9cd743304 100644 --- a/roles/nuage_node/tasks/main.yaml +++ b/roles/nuage_node/tasks/main.yaml @@ -54,3 +54,7 @@ - restart node - include: iptables.yml + +- name: setup firewall + include: firewall.yml + static: yes diff --git a/roles/nuage_node/templates/vsp-openshift.j2 b/roles/nuage_node/templates/vsp-openshift.j2 index 9fab53906..f6bccebc2 100644 --- a/roles/nuage_node/templates/vsp-openshift.j2 +++ b/roles/nuage_node/templates/vsp-openshift.j2 @@ -9,7 +9,7 @@ enterpriseName: {{ enterprise }} # Name of the domain in which pods will reside domainName: {{ domain }} # Name of the VSD user in admin group -vsdUser: {{ vsduser }} +vsdUser: {{ vsd_user }} # IP address and port number of master API server masterApiServer: {{ api_server }} # REST server URL diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml index 79e59b410..27ed57703 100644 --- a/roles/openshift_cfme/defaults/main.yml +++ b/roles/openshift_cfme/defaults/main.yml @@ -35,9 +35,9 @@ openshift_cfme_nfs_server: "{{ groups.nfs.0 }}" # --template=manageiq). If False everything UP TO 'new-app' is ran. openshift_cfme_install_app: False # Docker image to pull -openshift_cfme_application_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-app' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods:app-latest-fine' }}" -openshift_cfme_postgresql_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods:app-latest-fine' }}" -openshift_cfme_memcached_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-memcached' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods:app-latest-fine' }}" +openshift_cfme_application_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-app' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods' }}" +openshift_cfme_postgresql_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods' }}" +openshift_cfme_memcached_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-memcached' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods' }}" openshift_cfme_application_img_tag: "{{ 'latest' if openshift_deployment_type == 'openshift-enterprise' else 'app-latest-fine' }}" openshift_cfme_memcached_img_tag: "{{ 'latest' if openshift_deployment_type == 'openshift-enterprise' else 'memcached-latest-fine' }}" openshift_cfme_postgresql_img_tag: "{{ 'latest' if openshift_deployment_type == 'openshift-enterprise' else 'postgresql-latest-fine' }}" diff --git a/roles/openshift_cfme/handlers/main.yml b/roles/openshift_cfme/handlers/main.yml index 476a5e030..7e90b09a4 100644 --- a/roles/openshift_cfme/handlers/main.yml +++ b/roles/openshift_cfme/handlers/main.yml @@ -6,19 +6,14 @@ # See: https://github.com/openshift/openshift-ansible/pull/4041#discussion_r118770782 ###################################################################### -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) - notify: Verify API Server - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server - name: restart master controllers systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server # Using curl here since the uri module requires python-httplib2 and diff --git a/roles/openshift_common/meta/main.yml b/roles/openshift_common/meta/main.yml index cd8c75ec5..f1cf3e161 100644 --- a/roles/openshift_common/meta/main.yml +++ b/roles/openshift_common/meta/main.yml @@ -14,4 +14,3 @@ galaxy_info: dependencies: - role: openshift_facts - role: openshift_repos -- role: openshift_version diff --git a/roles/openshift_docker/meta/main.yml b/roles/openshift_docker/meta/main.yml index 10131f717..60efd4e45 100644 --- a/roles/openshift_docker/meta/main.yml +++ b/roles/openshift_docker/meta/main.yml @@ -12,6 +12,5 @@ galaxy_info: categories: - cloud dependencies: -- role: openshift_version - role: openshift_docker_facts - role: docker diff --git a/roles/openshift_facts/defaults/main.yml b/roles/openshift_facts/defaults/main.yml deleted file mode 100644 index cc4dc9365..000000000 --- a/roles/openshift_facts/defaults/main.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -openshift_use_system_containers: false diff --git a/roles/openshift_facts/meta/main.yml b/roles/openshift_facts/meta/main.yml index 7eead2d6e..0be3afd24 100644 --- a/roles/openshift_facts/meta/main.yml +++ b/roles/openshift_facts/meta/main.yml @@ -12,5 +12,4 @@ galaxy_info: categories: - cloud - system -dependencies: -- role: openshift_sanitize_inventory +dependencies: [] diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml deleted file mode 100644 index 4af02ab96..000000000 --- a/roles/openshift_facts/tasks/main.yml +++ /dev/null @@ -1,118 +0,0 @@ ---- -- name: Detecting Operating System - stat: - path: /run/ostree-booted - register: ostree_booted - -# Locally setup containerized facts for now -- set_fact: - l_is_atomic: "{{ ostree_booted.stat.exists }}" - l_use_crio: "{{ openshift_docker_use_crio | default(false) }}" -- set_fact: - l_is_containerized: "{{ (l_is_atomic | bool) or (containerized | default(false) | bool) }}" - l_is_openvswitch_system_container: "{{ (openshift_use_openvswitch_system_container | default(openshift_use_system_containers) | bool) }}" - l_is_node_system_container: "{{ (openshift_use_node_system_container | default(openshift_use_system_containers) | bool) }}" - l_is_master_system_container: "{{ (openshift_use_master_system_container | default(openshift_use_system_containers) | bool) }}" - l_is_etcd_system_container: "{{ (openshift_use_etcd_system_container | default(openshift_use_system_containers) | bool) }}" -- set_fact: - l_any_system_container: "{{ l_is_etcd_system_container or l_is_openvswitch_system_container or l_is_node_system_container or l_is_master_system_container }}" -- set_fact: - l_etcd_runtime: "{{ 'runc' if l_is_etcd_system_container else 'docker' if l_is_containerized else 'host' }}" - - -- name: Validate python version - fail: - msg: | - openshift-ansible requires Python 3 for {{ ansible_distribution }}; - For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html - when: - - ansible_distribution == 'Fedora' - - ansible_python['version']['major'] != 3 - - r_openshift_facts_ran is not defined - -- name: Validate python version - fail: - msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}" - when: - - ansible_distribution != 'Fedora' - - ansible_python['version']['major'] != 2 - - r_openshift_facts_ran is not defined - -# Fail as early as possible if Atomic and old version of Docker -- block: - - # See https://access.redhat.com/articles/2317361 - # and https://github.com/ansible/ansible/issues/15892 - # NOTE: the "'s can not be removed at this level else the docker command will fail - # NOTE: When ansible >2.2.1.x is used this can be updated per - # https://github.com/openshift/openshift-ansible/pull/3475#discussion_r103525121 - - name: Determine Atomic Host Docker Version - shell: 'CURLY="{"; docker version --format "$CURLY{json .Server.Version}}"' - register: l_atomic_docker_version - - - assert: - msg: Installation on Atomic Host requires Docker 1.12 or later. Please upgrade and restart the Atomic Host. - that: - - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=') - - when: - - not l_use_crio - - l_is_atomic | bool - - r_openshift_facts_ran is not defined - -- name: Load variables - include_vars: "{{ item }}" - with_first_found: - - "{{ ansible_distribution }}.yml" - - "default.yml" - -- name: Ensure various deps are installed - package: name={{ item }} state=present - with_items: "{{ required_packages }}" - when: - - not l_is_atomic | bool - - r_openshift_facts_ran is not defined - -- name: Ensure various deps for running system containers are installed - package: name={{ item }} state=present - with_items: "{{ required_system_containers_packages }}" - when: - - not l_is_atomic | bool - - l_any_system_container | bool - - r_openshift_facts_ran is not defined - -- name: Gather Cluster facts and set is_containerized if needed - openshift_facts: - role: common - local_facts: - debug_level: "{{ openshift_debug_level | default(2) }}" - deployment_type: "{{ openshift_deployment_type }}" - deployment_subtype: "{{ openshift_deployment_subtype | default(None) }}" - cluster_id: "{{ openshift_cluster_id | default('default') }}" - hostname: "{{ openshift_hostname | default(None) }}" - ip: "{{ openshift_ip | default(None) }}" - is_containerized: "{{ l_is_containerized | default(None) }}" - is_openvswitch_system_container: "{{ l_is_openvswitch_system_container | default(false) }}" - is_node_system_container: "{{ l_is_node_system_container | default(false) }}" - is_master_system_container: "{{ l_is_master_system_container | default(false) }}" - is_etcd_system_container: "{{ l_is_etcd_system_container | default(false) }}" - etcd_runtime: "{{ l_etcd_runtime }}" - system_images_registry: "{{ system_images_registry | default('') }}" - public_hostname: "{{ openshift_public_hostname | default(None) }}" - public_ip: "{{ openshift_public_ip | default(None) }}" - portal_net: "{{ openshift_portal_net | default(openshift_master_portal_net) | default(None) }}" - http_proxy: "{{ openshift_http_proxy | default(None) }}" - https_proxy: "{{ openshift_https_proxy | default(None) }}" - no_proxy: "{{ openshift_no_proxy | default(None) }}" - generate_no_proxy_hosts: "{{ openshift_generate_no_proxy_hosts | default(True) }}" - no_proxy_internal_hostnames: "{{ openshift_no_proxy_internal_hostnames | default(None) }}" - sdn_network_plugin_name: "{{ os_sdn_network_plugin_name | default(None) }}" - use_openshift_sdn: "{{ openshift_use_openshift_sdn | default(None) }}" - -- name: Set repoquery command - set_fact: - repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}" - -- name: Register that this already ran - set_fact: - r_openshift_facts_ran: True diff --git a/roles/openshift_facts/vars/Fedora.yml b/roles/openshift_facts/vars/Fedora.yml deleted file mode 100644 index 745f5f398..000000000 --- a/roles/openshift_facts/vars/Fedora.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -required_packages: - - iproute - - python3-dbus - - PyYAML - - yum-utils diff --git a/roles/openshift_facts/vars/default.yml b/roles/openshift_facts/vars/default.yml deleted file mode 100644 index 3cd616d16..000000000 --- a/roles/openshift_facts/vars/default.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -required_packages: - - iproute - - python-dbus - - PyYAML - - yum-utils diff --git a/roles/openshift_facts/vars/main.yml b/roles/openshift_facts/vars/main.yml deleted file mode 100644 index 89d4034d3..000000000 --- a/roles/openshift_facts/vars/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -required_system_containers_packages: - - atomic - - ostree - - runc diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index 23da53940..05e53333d 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -68,13 +68,15 @@ class ActionModule(ActionBase): msg=str(e), ) + if check.changed: + r["changed"] = True check_results[check_name] = r - if r.get("failed", False): - result["failed"] = True - result["msg"] = "One or more checks failed" + result["changed"] = any(r.get("changed") for r in check_results.values()) + if any(r.get("failed") for r in check_results.values()): + result["failed"] = True + result["msg"] = "One or more checks failed" - result["changed"] = any(r.get("changed", False) for r in check_results.values()) return result def load_known_checks(self, tmp, task_vars): diff --git a/roles/openshift_health_checker/meta/main.yml b/roles/openshift_health_checker/meta/main.yml index 4d141974c..cd9b55902 100644 --- a/roles/openshift_health_checker/meta/main.yml +++ b/roles/openshift_health_checker/meta/main.yml @@ -2,4 +2,3 @@ dependencies: - role: openshift_facts - role: openshift_repos - - role: openshift_version diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 85cbc6224..07ec6f7ef 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -10,11 +10,34 @@ from importlib import import_module from ansible.module_utils import six from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin +from ansible.plugins.filter.core import to_bool as ansible_to_bool class OpenShiftCheckException(Exception): - """Raised when a check cannot proceed.""" - pass + """Raised when a check encounters a failure condition.""" + + def __init__(self, name, msg=None): + # msg is for the message the user will see when this is raised. + # name is for test code to identify the error without looking at msg text. + if msg is None: # for parameter backward compatibility + msg = name + name = self.__class__.__name__ + self.name = name + super(OpenShiftCheckException, self).__init__(msg) + + +class OpenShiftCheckExceptionList(OpenShiftCheckException): + """A container for multiple logging errors that may be detected in one check.""" + def __init__(self, errors): + self.errors = errors + super(OpenShiftCheckExceptionList, self).__init__( + 'OpenShiftCheckExceptionList', + '\n'.join(str(msg) for msg in errors) + ) + + # make iterable + def __getitem__(self, index): + return self.errors[index] @six.add_metaclass(ABCMeta) @@ -35,6 +58,9 @@ class OpenShiftCheck(object): self.task_vars = task_vars or {} self.tmp = tmp + # set to True when the check changes the host, for accurate total "changed" count + self.changed = False + @abstractproperty def name(self): """The name of this check, usually derived from the class name.""" @@ -94,16 +120,59 @@ class OpenShiftCheck(object): Ansible task_vars structures are Python dicts, often mapping strings to other dicts. This helper makes it easier to get a nested value, raising - OpenShiftCheckException when a key is not found or returning a default value - provided as a keyword argument. + OpenShiftCheckException when a key is not found. + + Keyword args: + default: + On missing key, return this as default value instead of raising exception. + convert: + Supply a function to apply to normalize the value before returning it. + None is the default (return as-is). + This function should raise ValueError if the user has provided a value + that cannot be converted, or OpenShiftCheckException if some other + problem needs to be described to the user. """ + if len(keys) == 1: + keys = keys[0].split(".") + try: value = reduce(operator.getitem, keys, self.task_vars) except (KeyError, TypeError): - if "default" in kwargs: - return kwargs["default"] - raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys)))) - return value + if "default" not in kwargs: + raise OpenShiftCheckException( + "This check expects the '{}' inventory variable to be defined\n" + "in order to proceed, but it is undefined. There may be a bug\n" + "in Ansible, the checks, or their dependencies." + "".format(".".join(map(str, keys))) + ) + value = kwargs["default"] + + convert = kwargs.get("convert", None) + try: + if convert is None: + return value + elif convert is bool: # interpret bool as Ansible does, instead of python truthiness + return ansible_to_bool(value) + else: + return convert(value) + + except ValueError as error: # user error in specifying value + raise OpenShiftCheckException( + 'Cannot convert inventory variable to expected type:\n' + ' "{var}={value}"\n' + '{error}'.format(var=".".join(keys), value=value, error=error) + ) + + except OpenShiftCheckException: # some other check-specific problem + raise + + except Exception as error: # probably a bug in the function + raise OpenShiftCheckException( + 'There is a bug in this check. While trying to convert variable \n' + ' "{var}={value}"\n' + 'the given converter cannot be used or failed unexpectedly:\n' + '{error}'.format(var=".".join(keys), value=value, error=error) + ) @staticmethod def get_major_minor_version(openshift_image_tag): @@ -128,6 +197,31 @@ class OpenShiftCheck(object): components = tuple(int(x) for x in components[:2]) return components + def find_ansible_mount(self, path): + """Return the mount point for path from ansible_mounts.""" + + # reorganize list of mounts into dict by path + mount_for_path = { + mount['mount']: mount + for mount + in self.get_var('ansible_mounts') + } + + # NOTE: including base cases '/' and '' to ensure the loop ends + mount_targets = set(mount_for_path.keys()) | {'/', ''} + mount_point = path + while mount_point not in mount_targets: + mount_point = os.path.dirname(mount_point) + + try: + return mount_for_path[mount_point] + except KeyError: + known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) + raise OpenShiftCheckException( + 'Unable to determine mount point for path "{}".\n' + 'Known mount points: {}.'.format(path, known_mounts or 'none') + ) + LOADER_EXCLUDES = ( "__init__.py", diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 39ac0e4ec..6d1dea9ce 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -1,6 +1,5 @@ """Check that there is enough disk space in predefined paths.""" -import os.path import tempfile from openshift_checks import OpenShiftCheck, OpenShiftCheckException @@ -55,9 +54,6 @@ class DiskAvailability(OpenShiftCheck): def run(self): group_names = self.get_var("group_names") - ansible_mounts = self.get_var("ansible_mounts") - ansible_mounts = {mount['mount']: mount for mount in ansible_mounts} - user_config = self.get_var("openshift_check_min_host_disk_gb", default={}) try: # For backwards-compatibility, if openshift_check_min_host_disk_gb @@ -80,7 +76,7 @@ class DiskAvailability(OpenShiftCheck): # not part of the official recommendation but present in the user # configuration. for path, recommendation in self.recommended_disk_space_bytes.items(): - free_bytes = self.free_bytes(path, ansible_mounts) + free_bytes = self.free_bytes(path) recommended_bytes = max(recommendation.get(name, 0) for name in group_names) config = user_config.get(path, {}) @@ -127,22 +123,17 @@ class DiskAvailability(OpenShiftCheck): return {} - @staticmethod - def free_bytes(path, ansible_mounts): + def free_bytes(self, path): """Return the size available in path based on ansible_mounts.""" - mount_point = path - # arbitry value to prevent an infinite loop, in the unlike case that '/' - # is not in ansible_mounts. - max_depth = 32 - while mount_point not in ansible_mounts and max_depth > 0: - mount_point = os.path.dirname(mount_point) - max_depth -= 1 - + mount = self.find_ansible_mount(path) try: - free_bytes = ansible_mounts[mount_point]['size_available'] + return mount['size_available'] except KeyError: - known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none' - msg = 'Unable to determine disk availability for "{}". Known mount points: {}.' - raise OpenShiftCheckException(msg.format(path, known_mounts)) - - return free_bytes + raise OpenShiftCheckException( + 'Unable to retrieve disk availability for "{path}".\n' + 'Ansible facts included a matching mount point for this path:\n' + ' {mount}\n' + 'however it is missing the size_available field.\n' + 'To investigate, you can inspect the output of `ansible -m setup <host>`' + ''.format(path=path, mount=mount) + ) diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 77180223e..85a922f86 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -41,11 +41,10 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type def run(self): - msg, failed, changed = self.ensure_dependencies() + msg, failed = self.ensure_dependencies() if failed: return { "failed": True, - "changed": changed, "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg } @@ -54,11 +53,11 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # exit early if all images were found locally if not missing_images: - return {"changed": changed} + return {} registries = self.known_docker_registries() if not registries: - return {"failed": True, "msg": "Unable to retrieve any docker registries.", "changed": changed} + return {"failed": True, "msg": "Unable to retrieve any docker registries."} available_images = self.available_images(missing_images, registries) unavailable_images = set(missing_images) - set(available_images) @@ -70,10 +69,9 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): "One or more required Docker images are not available:\n {}\n" "Configured registries: {}" ).format(",\n ".join(sorted(unavailable_images)), ", ".join(registries)), - "changed": changed, } - return {"changed": changed} + return {} def required_images(self): """ diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index dea15a56e..0558ddf14 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -1,6 +1,5 @@ """Check Docker storage driver and usage.""" import json -import os.path import re from openshift_checks import OpenShiftCheck, OpenShiftCheckException from openshift_checks.mixins import DockerHostMixin @@ -43,21 +42,20 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): ] def run(self): - msg, failed, changed = self.ensure_dependencies() + msg, failed = self.ensure_dependencies() if failed: return { "failed": True, - "changed": changed, "msg": "Some dependencies are required in order to query docker storage on host:\n" + msg } # attempt to get the docker info hash from the API docker_info = self.execute_module("docker_info", {}) if docker_info.get("failed"): - return {"failed": True, "changed": changed, + return {"failed": True, "msg": "Failed to query Docker API. Is docker running on this host?"} if not docker_info.get("info"): # this would be very strange - return {"failed": True, "changed": changed, + return {"failed": True, "msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))} docker_info = docker_info["info"] @@ -68,7 +66,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "Detected unsupported Docker storage driver '{driver}'.\n" "Supported storage drivers are: {drivers}" ).format(driver=driver, drivers=', '.join(self.storage_drivers)) - return {"failed": True, "changed": changed, "msg": msg} + return {"failed": True, "msg": msg} # driver status info is a list of tuples; convert to dict and validate based on driver driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])} @@ -81,7 +79,6 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): if driver in ['overlay', 'overlay2']: result = self.check_overlay_support(docker_info, driver_status) - result['changed'] = result.get('changed', False) or changed return result def check_devicemapper_support(self, driver_status): @@ -254,7 +251,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold), } - mount = self.find_ansible_mount(path, self.get_var("ansible_mounts")) + mount = self.find_ansible_mount(path) try: free_bytes = mount['size_available'] total_bytes = mount['size_total'] @@ -277,22 +274,3 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } return {} - - # TODO(lmeyer): migrate to base class - @staticmethod - def find_ansible_mount(path, ansible_mounts): - """Return the mount point for path from ansible_mounts.""" - - mount_for_path = {mount['mount']: mount for mount in ansible_mounts} - mount_point = path - while mount_point not in mount_for_path: - if mount_point in ["/", ""]: # "/" not in ansible_mounts??? - break - mount_point = os.path.dirname(mount_point) - - try: - return mount_for_path[mount_point] - except KeyError: - known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none' - msg = 'Unable to determine mount point for path "{}". Known mount points: {}.' - raise OpenShiftCheckException(msg.format(path, known_mounts)) diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py index 28c38504d..f4296753a 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py @@ -2,7 +2,7 @@ Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster. """ -from openshift_checks import OpenShiftCheck, OpenShiftCheckException +from openshift_checks import OpenShiftCheck class EtcdImageDataSize(OpenShiftCheck): @@ -12,7 +12,7 @@ class EtcdImageDataSize(OpenShiftCheck): tags = ["etcd"] def run(self): - etcd_mountpath = self._get_etcd_mountpath(self.get_var("ansible_mounts")) + etcd_mountpath = self.find_ansible_mount("/var/lib/etcd") etcd_avail_diskspace = etcd_mountpath["size_available"] etcd_total_diskspace = etcd_mountpath["size_total"] @@ -56,7 +56,7 @@ class EtcdImageDataSize(OpenShiftCheck): reason = etcdkeysize["module_stderr"] msg = msg.format(host=etcd_host, reason=reason) - return {"failed": True, "changed": False, "msg": msg} + return {"failed": True, "msg": msg} if etcdkeysize["size_limit_exceeded"]: limit = self._to_gigabytes(etcd_imagedata_size_limit) @@ -65,20 +65,7 @@ class EtcdImageDataSize(OpenShiftCheck): "Use the `oadm prune images` command to cleanup unused Docker images.") return {"failed": True, "msg": msg.format(host=etcd_host, limit=limit)} - return {"changed": False} - - @staticmethod - def _get_etcd_mountpath(ansible_mounts): - valid_etcd_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] - - mount_for_path = {mnt.get("mount"): mnt for mnt in ansible_mounts} - for path in valid_etcd_mount_paths: - if path in mount_for_path: - return mount_for_path[path] - - paths = ', '.join(sorted(mount_for_path)) or 'none' - msg = "Unable to determine a valid etcd mountpath. Paths mounted: {}.".format(paths) - raise OpenShiftCheckException(msg) + return {} @staticmethod def _to_gigabytes(byte_size): diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index da7d0364a..e5d93ff3f 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -1,6 +1,6 @@ """A health check for OpenShift clusters.""" -from openshift_checks import OpenShiftCheck, OpenShiftCheckException +from openshift_checks import OpenShiftCheck class EtcdVolume(OpenShiftCheck): @@ -11,8 +11,8 @@ class EtcdVolume(OpenShiftCheck): # Default device usage threshold. Value should be in the range [0, 100]. default_threshold_percent = 90 - # Where to find ectd data, higher priority first. - supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] + # Where to find etcd data + etcd_mount_path = "/var/lib/etcd" def is_active(self): etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or [] @@ -20,7 +20,7 @@ class EtcdVolume(OpenShiftCheck): return super(EtcdVolume, self).is_active() and is_etcd_host def run(self): - mount_info = self._etcd_mount_info() + mount_info = self.find_ansible_mount(self.etcd_mount_path) available = mount_info["size_available"] total = mount_info["size_total"] used = total - available @@ -40,16 +40,4 @@ class EtcdVolume(OpenShiftCheck): ) return {"failed": True, "msg": msg} - return {"changed": False} - - def _etcd_mount_info(self): - ansible_mounts = self.get_var("ansible_mounts") - mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts} - - for path in self.supported_mount_paths: - if path in mounts: - return mounts[path] - - paths = ', '.join(sorted(mounts)) or 'none' - msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths) - raise OpenShiftCheckException(msg) + return {} diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py index 32d853d57..b27f97172 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/curator.py +++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py @@ -1,6 +1,6 @@ """Check for an aggregated logging Curator deployment""" -from openshift_checks.logging.logging import LoggingCheck +from openshift_checks.logging.logging import OpenShiftCheckException, LoggingCheck class Curator(LoggingCheck): @@ -12,27 +12,17 @@ class Curator(LoggingCheck): def run(self): """Check various things and gather errors. Returns: result as hash""" - self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - curator_pods, error = self.get_pods_for_component( - self.logging_namespace, - "curator", - ) - if error: - return {"failed": True, "changed": False, "msg": error} - check_error = self.check_curator(curator_pods) - - if check_error: - msg = ("The following Curator deployment issue was found:" - "{}".format(check_error)) - return {"failed": True, "changed": False, "msg": msg} - + curator_pods = self.get_pods_for_component("curator") + self.check_curator(curator_pods) # TODO(lmeyer): run it all again for the ops cluster - return {"failed": False, "changed": False, "msg": 'No problems found with Curator deployment.'} + + return {} def check_curator(self, pods): """Check to see if curator is up and working. Returns: error string""" if not pods: - return ( + raise OpenShiftCheckException( + "MissingComponentPods", "There are no Curator pods for the logging stack,\n" "so nothing will prune Elasticsearch indexes.\n" "Is Curator correctly deployed?" @@ -40,14 +30,14 @@ class Curator(LoggingCheck): not_running = self.not_running_pods(pods) if len(not_running) == len(pods): - return ( + raise OpenShiftCheckException( + "CuratorNotRunning", "The Curator pod is not currently in a running state,\n" "so Elasticsearch indexes may increase without bound." ) if len(pods) - len(not_running) > 1: - return ( + raise OpenShiftCheckException( + "TooManyCurators", "There is more than one Curator pod running. This should not normally happen.\n" "Although this doesn't cause any problems, you may want to investigate." ) - - return None diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py index 8bdda1f32..7fc843fd7 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -3,6 +3,7 @@ import json import re +from openshift_checks import OpenShiftCheckException, OpenShiftCheckExceptionList from openshift_checks.logging.logging import LoggingCheck @@ -15,168 +16,178 @@ class Elasticsearch(LoggingCheck): def run(self): """Check various things and gather errors. Returns: result as hash""" - self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - es_pods, error = self.get_pods_for_component( - self.logging_namespace, - "es", - ) - if error: - return {"failed": True, "changed": False, "msg": error} - check_error = self.check_elasticsearch(es_pods) - - if check_error: - msg = ("The following Elasticsearch deployment issue was found:" - "{}".format(check_error)) - return {"failed": True, "changed": False, "msg": msg} - + es_pods = self.get_pods_for_component("es") + self.check_elasticsearch(es_pods) # TODO(lmeyer): run it all again for the ops cluster - return {"failed": False, "changed": False, "msg": 'No problems found with Elasticsearch deployment.'} - def _not_running_elasticsearch_pods(self, es_pods): - """Returns: list of pods that are not running, list of errors about non-running pods""" - not_running = self.not_running_pods(es_pods) - if not_running: - return not_running, [( - 'The following Elasticsearch pods are not running:\n' - '{pods}' - 'These pods will not aggregate logs from their nodes.' - ).format(pods=''.join( - " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None')) - for pod in not_running - ))] - return not_running, [] + return {} def check_elasticsearch(self, es_pods): - """Various checks for elasticsearch. Returns: error string""" - not_running_pods, error_msgs = self._not_running_elasticsearch_pods(es_pods) - running_pods = [pod for pod in es_pods if pod not in not_running_pods] + """Perform checks for Elasticsearch. Raises OpenShiftCheckExceptionList on any errors.""" + running_pods, errors = self.running_elasticsearch_pods(es_pods) pods_by_name = { pod['metadata']['name']: pod for pod in running_pods # Filter out pods that are not members of a DC if pod['metadata'].get('labels', {}).get('deploymentconfig') } if not pods_by_name: - return 'No logging Elasticsearch pods were found. Is logging deployed?' - error_msgs += self._check_elasticsearch_masters(pods_by_name) - error_msgs += self._check_elasticsearch_node_list(pods_by_name) - error_msgs += self._check_es_cluster_health(pods_by_name) - error_msgs += self._check_elasticsearch_diskspace(pods_by_name) - return '\n'.join(error_msgs) + # nothing running, cannot run the rest of the check + errors.append(OpenShiftCheckException( + 'NoRunningPods', + 'No logging Elasticsearch pods were found running, so no logs are being aggregated.' + )) + raise OpenShiftCheckExceptionList(errors) + + errors += self.check_elasticsearch_masters(pods_by_name) + errors += self.check_elasticsearch_node_list(pods_by_name) + errors += self.check_es_cluster_health(pods_by_name) + errors += self.check_elasticsearch_diskspace(pods_by_name) + if errors: + raise OpenShiftCheckExceptionList(errors) + + def running_elasticsearch_pods(self, es_pods): + """Returns: list of running pods, list of errors about non-running pods""" + not_running = self.not_running_pods(es_pods) + running_pods = [pod for pod in es_pods if pod not in not_running] + if not_running: + return running_pods, [OpenShiftCheckException( + 'PodNotRunning', + 'The following Elasticsearch pods are defined but not running:\n' + '{pods}'.format(pods=''.join( + " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None')) + for pod in not_running + )) + )] + return running_pods, [] @staticmethod def _build_es_curl_cmd(pod_name, url): base = "exec {name} -- curl -s --cert {base}cert --key {base}key --cacert {base}ca -XGET '{url}'" return base.format(base="/etc/elasticsearch/secret/admin-", name=pod_name, url=url) - def _check_elasticsearch_masters(self, pods_by_name): - """Check that Elasticsearch masters are sane. Returns: list of error strings""" + def check_elasticsearch_masters(self, pods_by_name): + """Check that Elasticsearch masters are sane. Returns: list of errors""" es_master_names = set() - error_msgs = [] + errors = [] for pod_name in pods_by_name.keys(): # Compare what each ES node reports as master and compare for split brain get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master") - master_name_str = self.exec_oc(self.logging_namespace, get_master_cmd, []) + master_name_str = self.exec_oc(get_master_cmd, []) master_names = (master_name_str or '').split(' ') if len(master_names) > 1: es_master_names.add(master_names[1]) else: - error_msgs.append( - 'No master? Elasticsearch {pod} returned bad string when asked master name:\n' + errors.append(OpenShiftCheckException( + 'NoMasterName', + 'Elasticsearch {pod} gave unexpected response when asked master name:\n' ' {response}'.format(pod=pod_name, response=master_name_str) - ) + )) if not es_master_names: - error_msgs.append('No logging Elasticsearch masters were found. Is logging deployed?') - return '\n'.join(error_msgs) + errors.append(OpenShiftCheckException( + 'NoMasterFound', + 'No logging Elasticsearch masters were found.' + )) + return errors if len(es_master_names) > 1: - error_msgs.append( + errors.append(OpenShiftCheckException( + 'SplitBrainMasters', 'Found multiple Elasticsearch masters according to the pods:\n' '{master_list}\n' 'This implies that the masters have "split brain" and are not correctly\n' 'replicating data for the logging cluster. Log loss is likely to occur.' .format(master_list='\n'.join(' ' + master for master in es_master_names)) - ) + )) - return error_msgs + return errors - def _check_elasticsearch_node_list(self, pods_by_name): - """Check that reported ES masters are accounted for by pods. Returns: list of error strings""" + def check_elasticsearch_node_list(self, pods_by_name): + """Check that reported ES masters are accounted for by pods. Returns: list of errors""" if not pods_by_name: - return ['No logging Elasticsearch masters were found. Is logging deployed?'] + return [OpenShiftCheckException( + 'MissingComponentPods', + 'No logging Elasticsearch pods were found.' + )] # get ES cluster nodes node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes') - cluster_node_data = self.exec_oc(self.logging_namespace, node_cmd, []) + cluster_node_data = self.exec_oc(node_cmd, []) try: cluster_nodes = json.loads(cluster_node_data)['nodes'] except (ValueError, KeyError): - return [ + return [OpenShiftCheckException( + 'MissingNodeList', 'Failed to query Elasticsearch for the list of ES nodes. The output was:\n' + cluster_node_data - ] + )] # Try to match all ES-reported node hosts to known pods. - error_msgs = [] + errors = [] for node in cluster_nodes.values(): # Note that with 1.4/3.4 the pod IP may be used as the master name if not any(node['host'] in (pod_name, pod['status'].get('podIP')) for pod_name, pod in pods_by_name.items()): - error_msgs.append( + errors.append(OpenShiftCheckException( + 'EsPodNodeMismatch', 'The Elasticsearch cluster reports a member node "{node}"\n' 'that does not correspond to any known ES pod.'.format(node=node['host']) - ) + )) - return error_msgs + return errors - def _check_es_cluster_health(self, pods_by_name): + def check_es_cluster_health(self, pods_by_name): """Exec into the elasticsearch pods and check the cluster health. Returns: list of errors""" - error_msgs = [] + errors = [] for pod_name in pods_by_name.keys(): cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true') - cluster_health_data = self.exec_oc(self.logging_namespace, cluster_health_cmd, []) + cluster_health_data = self.exec_oc(cluster_health_cmd, []) try: health_res = json.loads(cluster_health_data) if not health_res or not health_res.get('status'): raise ValueError() except ValueError: - error_msgs.append( + errors.append(OpenShiftCheckException( + 'BadEsResponse', 'Could not retrieve cluster health status from logging ES pod "{pod}".\n' 'Response was:\n{output}'.format(pod=pod_name, output=cluster_health_data) - ) + )) continue if health_res['status'] not in ['green', 'yellow']: - error_msgs.append( + errors.append(OpenShiftCheckException( + 'EsClusterHealthRed', 'Elasticsearch cluster health status is RED according to pod "{}"'.format(pod_name) - ) + )) - return error_msgs + return errors - def _check_elasticsearch_diskspace(self, pods_by_name): + def check_elasticsearch_diskspace(self, pods_by_name): """ Exec into an ES pod and query the diskspace on the persistent volume. Returns: list of errors """ - error_msgs = [] + errors = [] for pod_name in pods_by_name.keys(): df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) - disk_output = self.exec_oc(self.logging_namespace, df_cmd, []) + disk_output = self.exec_oc(df_cmd, []) lines = disk_output.splitlines() # expecting one header looking like 'IUse% Use%' and one body line body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$' if len(lines) != 2 or len(lines[0].split()) != 2 or not re.match(body_re, lines[1]): - error_msgs.append( + errors.append(OpenShiftCheckException( + 'BadDfResponse', 'Could not retrieve storage usage from logging ES pod "{pod}".\n' 'Response to `df` command was:\n{output}'.format(pod=pod_name, output=disk_output) - ) + )) continue inode_pct, disk_pct = re.match(body_re, lines[1]).groups() inode_pct_thresh = self.get_var('openshift_check_efk_es_inode_pct', default='90') if int(inode_pct) >= int(inode_pct_thresh): - error_msgs.append( + errors.append(OpenShiftCheckException( + 'InodeUsageTooHigh', 'Inode percent usage on the storage volume for logging ES pod "{pod}"\n' ' is {pct}, greater than threshold {limit}.\n' ' Note: threshold can be specified in inventory with {param}'.format( @@ -184,10 +195,11 @@ class Elasticsearch(LoggingCheck): pct=str(inode_pct), limit=str(inode_pct_thresh), param='openshift_check_efk_es_inode_pct', - )) + ))) disk_pct_thresh = self.get_var('openshift_check_efk_es_storage_pct', default='80') if int(disk_pct) >= int(disk_pct_thresh): - error_msgs.append( + errors.append(OpenShiftCheckException( + 'DiskUsageTooHigh', 'Disk percent usage on the storage volume for logging ES pod "{pod}"\n' ' is {pct}, greater than threshold {limit}.\n' ' Note: threshold can be specified in inventory with {param}'.format( @@ -195,6 +207,6 @@ class Elasticsearch(LoggingCheck): pct=str(disk_pct), limit=str(disk_pct_thresh), param='openshift_check_efk_es_storage_pct', - )) + ))) - return error_msgs + return errors diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py index b3485bf44..3b192a281 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py @@ -2,6 +2,8 @@ import json + +from openshift_checks import OpenShiftCheckException, OpenShiftCheckExceptionList from openshift_checks.logging.logging import LoggingCheck @@ -12,57 +14,96 @@ class Fluentd(LoggingCheck): tags = ["health", "logging"] def run(self): - """Check various things and gather errors. Returns: result as hash""" + """Check the Fluentd deployment and raise an error if any problems are found.""" + + fluentd_pods = self.get_pods_for_component("fluentd") + self.check_fluentd(fluentd_pods) + return {} + + def check_fluentd(self, pods): + """Verify fluentd is running everywhere. Raises OpenShiftCheckExceptionList if error(s) found.""" - self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - fluentd_pods, error = super(Fluentd, self).get_pods_for_component( - self.logging_namespace, - "fluentd", + node_selector = self.get_var( + 'openshift_logging_fluentd_nodeselector', + default='logging-infra-fluentd=true' ) - if error: - return {"failed": True, "changed": False, "msg": error} - check_error = self.check_fluentd(fluentd_pods) - if check_error: - msg = ("The following Fluentd deployment issue was found:" - "{}".format(check_error)) - return {"failed": True, "changed": False, "msg": msg} + nodes_by_name = self.get_nodes_by_name() + fluentd_nodes = self.filter_fluentd_labeled_nodes(nodes_by_name, node_selector) - # TODO(lmeyer): run it all again for the ops cluster - return {"failed": False, "changed": False, "msg": 'No problems found with Fluentd deployment.'} + errors = [] + errors += self.check_node_labeling(nodes_by_name, fluentd_nodes, node_selector) + errors += self.check_nodes_have_fluentd(pods, fluentd_nodes) + errors += self.check_fluentd_pods_running(pods) + + # Make sure there are no extra fluentd pods + if len(pods) > len(fluentd_nodes): + errors.append(OpenShiftCheckException( + 'TooManyFluentdPods', + 'There are more Fluentd pods running than nodes labeled.\n' + 'This may not cause problems with logging but it likely indicates something wrong.' + )) + + if errors: + raise OpenShiftCheckExceptionList(errors) + + def get_nodes_by_name(self): + """Retrieve all the node definitions. Returns: dict(name: node)""" + nodes_json = self.exec_oc("get nodes -o json", []) + try: + nodes = json.loads(nodes_json) + except ValueError: # no valid json - should not happen + raise OpenShiftCheckException( + "BadOcNodeList", + "Could not obtain a list of nodes to validate fluentd.\n" + "Output from oc get:\n" + nodes_json + ) + if not nodes or not nodes.get('items'): # also should not happen + raise OpenShiftCheckException( + "NoNodesDefined", + "No nodes appear to be defined according to the API." + ) + return { + node['metadata']['name']: node + for node in nodes['items'] + } @staticmethod - def _filter_fluentd_labeled_nodes(nodes_by_name, node_selector): - """Filter to all nodes with fluentd label. Returns dict(name: node), error string""" + def filter_fluentd_labeled_nodes(nodes_by_name, node_selector): + """Filter to all nodes with fluentd label. Returns dict(name: node)""" label, value = node_selector.split('=', 1) fluentd_nodes = { name: node for name, node in nodes_by_name.items() if node['metadata']['labels'].get(label) == value } if not fluentd_nodes: - return None, ( + raise OpenShiftCheckException( + 'NoNodesLabeled', 'There are no nodes with the fluentd label {label}.\n' - 'This means no logs will be aggregated from the nodes.' - ).format(label=node_selector) - return fluentd_nodes, None + 'This means no logs will be aggregated from the nodes.'.format(label=node_selector) + ) + return fluentd_nodes - def _check_node_labeling(self, nodes_by_name, fluentd_nodes, node_selector): - """Note if nodes are not labeled as expected. Returns: error string""" + def check_node_labeling(self, nodes_by_name, fluentd_nodes, node_selector): + """Note if nodes are not labeled as expected. Returns: error list""" intended_nodes = self.get_var('openshift_logging_fluentd_hosts', default=['--all']) if not intended_nodes or '--all' in intended_nodes: intended_nodes = nodes_by_name.keys() nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys()) if nodes_missing_labels: - return ( + return [OpenShiftCheckException( + 'NodesUnlabeled', 'The following nodes are supposed to be labeled with {label} but are not:\n' ' {nodes}\n' - 'Fluentd will not aggregate logs from these nodes.' - ).format(label=node_selector, nodes=', '.join(nodes_missing_labels)) - return None + 'Fluentd will not aggregate logs from these nodes.'.format( + label=node_selector, nodes=', '.join(nodes_missing_labels) + ))] + + return [] @staticmethod - def _check_nodes_have_fluentd(pods, fluentd_nodes): - """Make sure fluentd is on all the labeled nodes. Returns: error string""" + def check_nodes_have_fluentd(pods, fluentd_nodes): + """Make sure fluentd is on all the labeled nodes. Returns: error list""" unmatched_nodes = fluentd_nodes.copy() node_names_by_label = { node['metadata']['labels']['kubernetes.io/hostname']: name @@ -82,80 +123,32 @@ class Fluentd(LoggingCheck): ]: unmatched_nodes.pop(name, None) if unmatched_nodes: - return ( + return [OpenShiftCheckException( + 'MissingFluentdPod', 'The following nodes are supposed to have a Fluentd pod but do not:\n' - '{nodes}' - 'These nodes will not have their logs aggregated.' - ).format(nodes=''.join( - " {}\n".format(name) - for name in unmatched_nodes.keys() - )) - return None + ' {nodes}\n' + 'These nodes will not have their logs aggregated.'.format( + nodes='\n '.join(unmatched_nodes.keys()) + ))] + + return [] - def _check_fluentd_pods_running(self, pods): + def check_fluentd_pods_running(self, pods): """Make sure all fluentd pods are running. Returns: error string""" not_running = super(Fluentd, self).not_running_pods(pods) if not_running: - return ( + return [OpenShiftCheckException( + 'FluentdNotRunning', 'The following Fluentd pods are supposed to be running but are not:\n' - '{pods}' - 'These pods will not aggregate logs from their nodes.' - ).format(pods=''.join( - " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None')) - for pod in not_running - )) - return None - - def check_fluentd(self, pods): - """Verify fluentd is running everywhere. Returns: error string""" - - node_selector = self.get_var( - 'openshift_logging_fluentd_nodeselector', - default='logging-infra-fluentd=true' - ) - - nodes_by_name, error = self.get_nodes_by_name() - - if error: - return error - fluentd_nodes, error = self._filter_fluentd_labeled_nodes(nodes_by_name, node_selector) - if error: - return error - - error_msgs = [] - error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector) - if error: - error_msgs.append(error) - error = self._check_nodes_have_fluentd(pods, fluentd_nodes) - if error: - error_msgs.append(error) - error = self._check_fluentd_pods_running(pods) - if error: - error_msgs.append(error) - - # Make sure there are no extra fluentd pods - if len(pods) > len(fluentd_nodes): - error_msgs.append( - 'There are more Fluentd pods running than nodes labeled.\n' - 'This may not cause problems with logging but it likely indicates something wrong.' - ) - - return '\n'.join(error_msgs) - - def get_nodes_by_name(self): - """Retrieve all the node definitions. Returns: dict(name: node), error string""" - nodes_json = self.exec_oc( - self.logging_namespace, - "get nodes -o json", - [] - ) - try: - nodes = json.loads(nodes_json) - except ValueError: # no valid json - should not happen - return None, "Could not obtain a list of nodes to validate fluentd. Output from oc get:\n" + nodes_json - if not nodes or not nodes.get('items'): # also should not happen - return None, "No nodes appear to be defined according to the API." - return { - node['metadata']['name']: node - for node in nodes['items'] - }, None + ' {pods}\n' + 'These pods will not aggregate logs from their nodes.'.format( + pods='\n'.join( + " {name} ({host})".format( + name=pod['metadata']['name'], + host=pod['spec'].get('host', 'None') + ) + for pod in not_running + ) + ))] + + return [] diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py index 0970f0a63..d783e6760 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py @@ -24,7 +24,6 @@ class FluentdConfig(LoggingCheck): def run(self): """Check that Fluentd has running pods, and that its logging config matches Docker's logging config.""" - self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace) config_error = self.check_logging_config() if config_error: msg = ("The following Fluentd logging configuration problem was found:" @@ -120,19 +119,13 @@ class FluentdConfig(LoggingCheck): def running_fluentd_pods(self): """Return a list of running fluentd pods.""" - fluentd_pods, error = self.get_pods_for_component( - self.logging_namespace, - "fluentd", - ) - if error: - msg = 'Unable to retrieve any pods for the "fluentd" logging component: {}'.format(error) - raise OpenShiftCheckException(msg) + fluentd_pods = self.get_pods_for_component("fluentd") running_fluentd_pods = [pod for pod in fluentd_pods if pod['status']['phase'] == 'Running'] if not running_fluentd_pods: - msg = ('No Fluentd pods were found to be in the "Running" state. ' - 'At least one Fluentd pod is required in order to perform this check.') - - raise OpenShiftCheckException(msg) + raise OpenShiftCheckException( + 'No Fluentd pods were found to be in the "Running" state. ' + 'At least one Fluentd pod is required in order to perform this check.' + ) return running_fluentd_pods diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py index efb14ab42..3b1cf8baa 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -12,7 +12,7 @@ except ImportError: from urllib.error import HTTPError, URLError import urllib.request as urllib2 -from openshift_checks.logging.logging import LoggingCheck +from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException class Kibana(LoggingCheck): @@ -24,25 +24,12 @@ class Kibana(LoggingCheck): def run(self): """Check various things and gather errors. Returns: result as hash""" - self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - kibana_pods, error = self.get_pods_for_component( - self.logging_namespace, - "kibana", - ) - if error: - return {"failed": True, "changed": False, "msg": error} - check_error = self.check_kibana(kibana_pods) - - if not check_error: - check_error = self._check_kibana_route() - - if check_error: - msg = ("The following Kibana deployment issue was found:" - "{}".format(check_error)) - return {"failed": True, "changed": False, "msg": msg} - + kibana_pods = self.get_pods_for_component("kibana") + self.check_kibana(kibana_pods) + self.check_kibana_route() # TODO(lmeyer): run it all again for the ops cluster - return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'} + + return {} def _verify_url_internal(self, url): """ @@ -65,7 +52,7 @@ class Kibana(LoggingCheck): def _verify_url_external(url): """ Try to reach a URL from ansible control host. - Returns: success (bool), reason (for failure) + Raise an OpenShiftCheckException if anything goes wrong. """ # This actually checks from the ansible control host, which may or may not # really be "external" to the cluster. @@ -91,130 +78,149 @@ class Kibana(LoggingCheck): return None def check_kibana(self, pods): - """Check to see if Kibana is up and working. Returns: error string.""" + """Check to see if Kibana is up and working. Raises OpenShiftCheckException if not.""" if not pods: - return "There are no Kibana pods deployed, so no access to the logging UI." + raise OpenShiftCheckException( + "MissingComponentPods", + "There are no Kibana pods deployed, so no access to the logging UI." + ) not_running = self.not_running_pods(pods) if len(not_running) == len(pods): - return "No Kibana pod is in a running state, so there is no access to the logging UI." + raise OpenShiftCheckException( + "NoRunningPods", + "No Kibana pod is in a running state, so there is no access to the logging UI." + ) elif not_running: - return ( + raise OpenShiftCheckException( + "PodNotRunning", "The following Kibana pods are not currently in a running state:\n" - "{pods}" - "However at least one is, so service may not be impacted." - ).format(pods="".join(" " + pod['metadata']['name'] + "\n" for pod in not_running)) - - return None + " {pods}\n" + "However at least one is, so service may not be impacted.".format( + pods="\n ".join(pod['metadata']['name'] for pod in not_running) + ) + ) def _get_kibana_url(self): """ Get kibana route or report error. - Returns: url (or empty), reason for failure + Returns: url """ # Get logging url - get_route = self.exec_oc( - self.logging_namespace, - "get route logging-kibana -o json", - [], - ) + get_route = self.exec_oc("get route logging-kibana -o json", []) if not get_route: - return None, 'no_route_exists' + raise OpenShiftCheckException( + 'no_route_exists', + 'No route is defined for Kibana in the logging namespace,\n' + 'so the logging stack is not accessible. Is logging deployed?\n' + 'Did something remove the logging-kibana route?' + ) - route = json.loads(get_route) + try: + route = json.loads(get_route) + # check that the route has been accepted by a router + ingress = route["status"]["ingress"] + except (ValueError, KeyError): + raise OpenShiftCheckException( + 'get_route_failed', + '"oc get route" returned an unexpected response:\n' + get_route + ) - # check that the route has been accepted by a router - ingress = route["status"]["ingress"] # ingress can be null if there is no router, or empty if not routed if not ingress or not ingress[0]: - return None, 'route_not_accepted' + raise OpenShiftCheckException( + 'route_not_accepted', + 'The logging-kibana route is not being routed by any router.\n' + 'Is the router deployed and working?' + ) host = route.get("spec", {}).get("host") if not host: - return None, 'route_missing_host' + raise OpenShiftCheckException( + 'route_missing_host', + 'The logging-kibana route has no hostname defined,\n' + 'which should never happen. Did something alter its definition?' + ) - return 'https://{}/'.format(host), None + return 'https://{}/'.format(host) - def _check_kibana_route(self): + def check_kibana_route(self): """ Check to see if kibana route is up and working. - Returns: error string + Raises exception if not. """ - known_errors = dict( - no_route_exists=( - 'No route is defined for Kibana in the logging namespace,\n' - 'so the logging stack is not accessible. Is logging deployed?\n' - 'Did something remove the logging-kibana route?' - ), - route_not_accepted=( - 'The logging-kibana route is not being routed by any router.\n' - 'Is the router deployed and working?' - ), - route_missing_host=( - 'The logging-kibana route has no hostname defined,\n' - 'which should never happen. Did something alter its definition?' - ), - ) - kibana_url, error = self._get_kibana_url() - if not kibana_url: - return known_errors.get(error, error) + kibana_url = self._get_kibana_url() # first, check that kibana is reachable from the master. error = self._verify_url_internal(kibana_url) if error: if 'urlopen error [Errno 111] Connection refused' in error: - error = ( + raise OpenShiftCheckException( + 'FailedToConnectInternal', 'Failed to connect from this master to Kibana URL {url}\n' - 'Is kibana running, and is at least one router routing to it?' - ).format(url=kibana_url) + 'Is kibana running, and is at least one router routing to it?'.format(url=kibana_url) + ) elif 'urlopen error [Errno -2] Name or service not known' in error: - error = ( + raise OpenShiftCheckException( + 'FailedToResolveInternal', 'Failed to connect from this master to Kibana URL {url}\n' 'because the hostname does not resolve.\n' - 'Is DNS configured for the Kibana hostname?' - ).format(url=kibana_url) + 'Is DNS configured for the Kibana hostname?'.format(url=kibana_url) + ) elif 'Status code was not' in error: - error = ( + raise OpenShiftCheckException( + 'WrongReturnCodeInternal', 'A request from this master to the Kibana URL {url}\n' 'did not return the correct status code (302).\n' 'This could mean that Kibana is malfunctioning, the hostname is\n' 'resolving incorrectly, or other network issues. The output was:\n' - ' {error}' - ).format(url=kibana_url, error=error) - return 'Error validating the logging Kibana route:\n' + error + ' {error}'.format(url=kibana_url, error=error) + ) + raise OpenShiftCheckException( + 'MiscRouteErrorInternal', + 'Error validating the logging Kibana route internally:\n' + error + ) # in production we would like the kibana route to work from outside the # cluster too; but that may not be the case, so allow disabling just this part. - if not self.get_var("openshift_check_efk_kibana_external", default=True): - return None + if self.get_var("openshift_check_efk_kibana_external", default="True").lower() != "true": + return error = self._verify_url_external(kibana_url) - if error: - if 'urlopen error [Errno 111] Connection refused' in error: - error = ( - 'Failed to connect from the Ansible control host to Kibana URL {url}\n' - 'Is the router for the Kibana hostname exposed externally?' - ).format(url=kibana_url) - elif 'urlopen error [Errno -2] Name or service not known' in error: - error = ( - 'Failed to resolve the Kibana hostname in {url}\n' - 'from the Ansible control host.\n' - 'Is DNS configured to resolve this Kibana hostname externally?' - ).format(url=kibana_url) - elif 'Expected success (200)' in error: - error = ( - 'A request to Kibana at {url}\n' - 'returned the wrong error code:\n' - ' {error}\n' - 'This could mean that Kibana is malfunctioning, the hostname is\n' - 'resolving incorrectly, or other network issues.' - ).format(url=kibana_url, error=error) - error = ( - 'Error validating the logging Kibana route:\n{error}\n' - 'To disable external Kibana route validation, set in your inventory:\n' - ' openshift_check_efk_kibana_external=False' - ).format(error=error) - return error - return None + + if not error: + return + + error_fmt = ( + 'Error validating the logging Kibana route:\n{error}\n' + 'To disable external Kibana route validation, set the variable:\n' + ' openshift_check_efk_kibana_external=False' + ) + if 'urlopen error [Errno 111] Connection refused' in error: + msg = ( + 'Failed to connect from the Ansible control host to Kibana URL {url}\n' + 'Is the router for the Kibana hostname exposed externally?' + ).format(url=kibana_url) + raise OpenShiftCheckException('FailedToConnect', error_fmt.format(error=msg)) + elif 'urlopen error [Errno -2] Name or service not known' in error: + msg = ( + 'Failed to resolve the Kibana hostname in {url}\n' + 'from the Ansible control host.\n' + 'Is DNS configured to resolve this Kibana hostname externally?' + ).format(url=kibana_url) + raise OpenShiftCheckException('FailedToResolve', error_fmt.format(error=msg)) + elif 'Expected success (200)' in error: + msg = ( + 'A request to Kibana at {url}\n' + 'returned the wrong error code:\n' + ' {error}\n' + 'This could mean that Kibana is malfunctioning, the hostname is\n' + 'resolving incorrectly, or other network issues.' + ).format(url=kibana_url, error=error) + raise OpenShiftCheckException('WrongReturnCode', error_fmt.format(error=msg)) + raise OpenShiftCheckException( + 'MiscRouteError', + 'Error validating the logging Kibana route externally:\n' + error + ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 43ba6c406..ecd8adb64 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -8,6 +8,16 @@ import os from openshift_checks import OpenShiftCheck, OpenShiftCheckException +class MissingComponentPods(OpenShiftCheckException): + """Raised when a component has no pods in the namespace.""" + pass + + +class CouldNotUseOc(OpenShiftCheckException): + """Raised when ocutil has a failure running oc.""" + pass + + class LoggingCheck(OpenShiftCheck): """Base class for OpenShift aggregated logging component checks""" @@ -15,10 +25,9 @@ class LoggingCheck(OpenShiftCheck): # run by itself. name = "logging" - logging_namespace = "logging" def is_active(self): - logging_deployed = self.get_var("openshift_hosted_logging_deploy", default=False) + logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False) return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master() def is_first_master(self): @@ -32,22 +41,24 @@ class LoggingCheck(OpenShiftCheck): def run(self): return {} - def get_pods_for_component(self, namespace, logging_component): - """Get all pods for a given component. Returns: list of pods for component, error string""" + def get_pods_for_component(self, logging_component): + """Get all pods for a given component. Returns: list of pods.""" pod_output = self.exec_oc( - namespace, "get pods -l component={} -o json".format(logging_component), [], ) try: - pods = json.loads(pod_output) - if not pods or not pods.get('items'): + pods = json.loads(pod_output) # raises ValueError if deserialize fails + if not pods or not pods.get('items'): # also a broken response, treat the same raise ValueError() except ValueError: - # successful run but non-parsing data generally means there were no pods in the namespace - return None, 'No pods were found for the "{}" logging component.'.format(logging_component) + # successful run but non-parsing data generally means there were no pods to be found + raise MissingComponentPods( + 'There are no "{}" component pods in the "{}" namespace.\n' + 'Is logging deployed?'.format(logging_component, self.logging_namespace()) + ) - return pods['items'], None + return pods['items'] @staticmethod def not_running_pods(pods): @@ -63,15 +74,19 @@ class LoggingCheck(OpenShiftCheck): ) ] - def exec_oc(self, namespace="logging", cmd_str="", extra_args=None): + def logging_namespace(self): + """Returns the namespace in which logging is configured to deploy.""" + return self.get_var("openshift_logging_namespace", default="logging") + + def exec_oc(self, cmd_str="", extra_args=None): """ Execute an 'oc' command in the remote host. Returns: output of command and namespace, - or raises OpenShiftCheckException on error + or raises CouldNotUseOc on error """ config_base = self.get_var("openshift", "common", "config_base") args = { - "namespace": namespace, + "namespace": self.logging_namespace(), "config_file": os.path.join(config_base, "master", "admin.kubeconfig"), "cmd": cmd_str, "extra_args": list(extra_args) if extra_args else [], @@ -79,17 +94,16 @@ class LoggingCheck(OpenShiftCheck): result = self.execute_module("ocutil", args) if result.get("failed"): - msg = ( - 'Unexpected error using `oc` to validate the logging stack components.\n' - 'Error executing `oc {cmd}`:\n' - '{error}' - ).format(cmd=args['cmd'], error=result['result']) - if result['result'] == '[Errno 2] No such file or directory': - msg = ( + raise CouldNotUseOc( "This host is supposed to be a master but does not have the `oc` command where expected.\n" "Has an installation been run on this host yet?" ) - raise OpenShiftCheckException(msg) + + raise CouldNotUseOc( + 'Unexpected error using `oc` to validate the logging stack components.\n' + 'Error executing `oc {cmd}`:\n' + '{error}'.format(cmd=args['cmd'], error=result['result']) + ) return result.get("result", "") diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py index b24e88e05..d781db649 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py @@ -19,8 +19,6 @@ class LoggingIndexTime(LoggingCheck): name = "logging_index_time" tags = ["health", "logging"] - logging_namespace = "logging" - def run(self): """Add log entry by making unique request to Kibana. Check for unique entry in the ElasticSearch pod logs.""" try: @@ -28,29 +26,25 @@ class LoggingIndexTime(LoggingCheck): self.get_var("openshift_check_logging_index_timeout_seconds", default=ES_CMD_TIMEOUT_SECONDS) ) except ValueError: - return { - "failed": True, - "msg": ('Invalid value provided for "openshift_check_logging_index_timeout_seconds". ' - 'Value must be an integer representing an amount in seconds.'), - } + raise OpenShiftCheckException( + 'InvalidTimeout', + 'Invalid value provided for "openshift_check_logging_index_timeout_seconds". ' + 'Value must be an integer representing an amount in seconds.' + ) running_component_pods = dict() # get all component pods - self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace) for component, name in (['kibana', 'Kibana'], ['es', 'Elasticsearch']): - pods, error = self.get_pods_for_component(self.logging_namespace, component) - - if error: - msg = 'Unable to retrieve pods for the {} logging component: {}' - return {"failed": True, "changed": False, "msg": msg.format(name, error)} - + pods = self.get_pods_for_component(component) running_pods = self.running_pods(pods) if not running_pods: - msg = ('No {} pods in the "Running" state were found.' - 'At least one pod is required in order to perform this check.') - return {"failed": True, "changed": False, "msg": msg.format(name)} + raise OpenShiftCheckException( + component + 'NoRunningPods', + 'No {} pods in the "Running" state were found.' + 'At least one pod is required in order to perform this check.'.format(name) + ) running_component_pods[component] = running_pods @@ -65,8 +59,11 @@ class LoggingIndexTime(LoggingCheck): interval = 1 while not self.query_es_from_es(es_pod, uuid): if time.time() + interval > deadline: - msg = "expecting match in Elasticsearch for message with uuid {}, but no matches were found after {}s." - raise OpenShiftCheckException(msg.format(uuid, timeout_secs)) + raise OpenShiftCheckException( + "NoMatchFound", + "expecting match in Elasticsearch for message with uuid {}, " + "but no matches were found after {}s.".format(uuid, timeout_secs) + ) time.sleep(interval) def curl_kibana_with_uuid(self, kibana_pod): @@ -76,22 +73,23 @@ class LoggingIndexTime(LoggingCheck): exec_cmd = "exec {pod_name} -c kibana -- curl --max-time 30 -s http://localhost:5601/{uuid}" exec_cmd = exec_cmd.format(pod_name=pod_name, uuid=uuid) - error_str = self.exec_oc(self.logging_namespace, exec_cmd, []) + error_str = self.exec_oc(exec_cmd, []) try: error_code = json.loads(error_str)["statusCode"] - except KeyError: - msg = ('invalid response returned from Kibana request (Missing "statusCode" key):\n' - 'Command: {}\nResponse: {}').format(exec_cmd, error_str) - raise OpenShiftCheckException(msg) - except ValueError: - msg = ('invalid response returned from Kibana request (Non-JSON output):\n' - 'Command: {}\nResponse: {}').format(exec_cmd, error_str) - raise OpenShiftCheckException(msg) + except (KeyError, ValueError): + raise OpenShiftCheckException( + 'kibanaInvalidResponse', + 'invalid response returned from Kibana request:\n' + 'Command: {}\nResponse: {}'.format(exec_cmd, error_str) + ) if error_code != 404: - msg = 'invalid error code returned from Kibana request. Expecting error code "404", but got "{}" instead.' - raise OpenShiftCheckException(msg.format(error_code)) + raise OpenShiftCheckException( + 'kibanaInvalidReturnCode', + 'invalid error code returned from Kibana request.\n' + 'Expecting error code "404", but got "{}" instead.'.format(error_code) + ) return uuid @@ -105,17 +103,18 @@ class LoggingIndexTime(LoggingCheck): "--key /etc/elasticsearch/secret/admin-key " "https://logging-es:9200/project.{namespace}*/_count?q=message:{uuid}" ) - exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace, uuid=uuid) - result = self.exec_oc(self.logging_namespace, exec_cmd, []) + exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace(), uuid=uuid) + result = self.exec_oc(exec_cmd, []) try: count = json.loads(result)["count"] - except KeyError: - msg = 'invalid response from Elasticsearch query:\n"{}"\nMissing "count" key:\n{}' - raise OpenShiftCheckException(msg.format(exec_cmd, result)) - except ValueError: - msg = 'invalid response from Elasticsearch query:\n"{}"\nNon-JSON output:\n{}' - raise OpenShiftCheckException(msg.format(exec_cmd, result)) + except (KeyError, ValueError): + raise OpenShiftCheckException( + 'esInvalidResponse', + 'Invalid response from Elasticsearch query:\n' + ' {}\n' + 'Response was:\n{}'.format(exec_cmd, result) + ) return count diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 3b2c64e6a..e9bae60a3 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -29,10 +29,10 @@ class DockerHostMixin(object): """ Ensure that docker-related packages exist, but not on atomic hosts (which would not be able to install but should already have them). - Returns: msg, failed, changed + Returns: msg, failed """ if self.get_var("openshift", "common", "is_atomic"): - return "", False, False + return "", False # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: @@ -49,5 +49,5 @@ class DockerHostMixin(object): " {deps}\n{msg}" ).format(deps=',\n '.join(self.dependencies), msg=msg) failed = result.get("failed", False) or result.get("rc", 0) != 0 - changed = result.get("changed", False) - return msg, failed, changed + self.changed = result.get("changed", False) + return msg, failed diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index 2d068be3d..f5161d6f5 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -6,7 +6,7 @@ from openshift_health_check import ActionModule, resolve_checks from openshift_checks import OpenShiftCheckException -def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None): +def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None, changed=False): """Returns a new class that is compatible with OpenShiftCheck for testing.""" _name, _tags = name, tags @@ -14,6 +14,7 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru class FakeCheck(object): name = _name tags = _tags or [] + changed = False def __init__(self, execute_module=None, task_vars=None, tmp=None): pass @@ -22,6 +23,7 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru return is_active def run(self): + self.changed = changed if run_exception is not None: raise run_exception return run_return @@ -135,14 +137,15 @@ def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch): - check_return_value = {'ok': 'test', 'changed': True} - check_class = fake_check(run_return=check_return_value) + check_return_value = {'ok': 'test'} + check_class = fake_check(run_return=check_return_value, changed=True) monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) assert result['checks']['fake_check'] == check_return_value + assert changed(result['checks']['fake_check']) assert not failed(result) assert changed(result) assert not skipped(result) @@ -165,7 +168,7 @@ def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): exception_msg = 'fake check has an exception' run_exception = OpenShiftCheckException(exception_msg) - check_class = fake_check(run_exception=run_exception) + check_class = fake_check(run_exception=run_exception, changed=True) monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) @@ -173,7 +176,8 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): assert failed(result['checks']['fake_check'], msg_has=exception_msg) assert failed(result, msg_has=['failed']) - assert not changed(result) + assert changed(result['checks']['fake_check']) + assert changed(result) assert not skipped(result) diff --git a/roles/openshift_health_checker/test/curator_test.py b/roles/openshift_health_checker/test/curator_test.py index ae108c96e..62c680b74 100644 --- a/roles/openshift_health_checker/test/curator_test.py +++ b/roles/openshift_health_checker/test/curator_test.py @@ -1,22 +1,6 @@ import pytest -from openshift_checks.logging.curator import Curator - - -def canned_curator(exec_oc=None): - """Create a Curator check object with canned exec_oc method""" - check = Curator("dummy") # fails if a module is actually invoked - if exec_oc: - check._exec_oc = exec_oc - return check - - -def assert_error(error, expect_error): - if expect_error: - assert error - assert expect_error in error - else: - assert not error +from openshift_checks.logging.curator import Curator, OpenShiftCheckException plain_curator_pod = { @@ -44,25 +28,30 @@ not_running_curator_pod = { } +def test_get_curator_pods(): + check = Curator() + check.get_pods_for_component = lambda *_: [plain_curator_pod] + result = check.run() + assert "failed" not in result or not result["failed"] + + @pytest.mark.parametrize('pods, expect_error', [ ( [], - "no Curator pods", - ), - ( - [plain_curator_pod], - None, + 'MissingComponentPods', ), ( [not_running_curator_pod], - "not currently in a running state", + 'CuratorNotRunning', ), ( [plain_curator_pod, plain_curator_pod], - "more than one Curator pod", + 'TooManyCurators', ), ]) -def test_get_curator_pods(pods, expect_error): - check = canned_curator() - error = check.check_curator(pods) - assert_error(error, expect_error) +def test_get_curator_pods_fail(pods, expect_error): + check = Curator() + check.get_pods_for_component = lambda *_: pods + with pytest.raises(OpenShiftCheckException) as excinfo: + check.run() + assert excinfo.value.name == expect_error diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index 5720eeacf..f4fd2dfed 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -20,12 +20,24 @@ def test_is_active(group_names, is_active): assert DiskAvailability(None, task_vars).is_active() == is_active -@pytest.mark.parametrize('ansible_mounts,extra_words', [ - ([], ['none']), # empty ansible_mounts - ([{'mount': '/mnt'}], ['/mnt']), # missing relevant mount paths - ([{'mount': '/var'}], ['/var']), # missing size_available +@pytest.mark.parametrize('desc, ansible_mounts, expect_chunks', [ + ( + 'empty ansible_mounts', + [], + ['determine mount point', 'none'], + ), + ( + 'missing relevant mount paths', + [{'mount': '/mnt'}], + ['determine mount point', '/mnt'], + ), + ( + 'missing size_available', + [{'mount': '/var'}, {'mount': '/usr'}, {'mount': '/tmp'}], + ['missing', 'size_available'], + ), ]) -def test_cannot_determine_available_disk(ansible_mounts, extra_words): +def test_cannot_determine_available_disk(desc, ansible_mounts, expect_chunks): task_vars = dict( group_names=['masters'], ansible_mounts=ansible_mounts, @@ -34,8 +46,8 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): with pytest.raises(OpenShiftCheckException) as excinfo: DiskAvailability(fake_execute_module, task_vars).run() - for word in 'determine disk availability'.split() + extra_words: - assert word in str(excinfo.value) + for chunk in expect_chunks: + assert chunk in str(excinfo.value) @pytest.mark.parametrize('group_names,configured_min,ansible_mounts', [ @@ -97,7 +109,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib assert not result.get('failed', False) -@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,extra_words', [ +@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,expect_chunks', [ ( 'test with no space available', ['masters'], @@ -164,7 +176,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['0.0 GB'], ), ], ids=lambda argval: argval[0]) -def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, extra_words): +def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, expect_chunks): task_vars = dict( group_names=group_names, openshift_check_min_host_disk_gb=configured_min, @@ -174,8 +186,8 @@ def test_fails_with_insufficient_disk_space(name, group_names, configured_min, a result = DiskAvailability(fake_execute_module, task_vars).run() assert result['failed'] - for word in 'below recommended'.split() + extra_words: - assert word in result.get('msg', '') + for chunk in 'below recommended'.split() + expect_chunks: + assert chunk in result.get('msg', '') @pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [ diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py index 67408609a..09bacd9ac 100644 --- a/roles/openshift_health_checker/test/elasticsearch_test.py +++ b/roles/openshift_health_checker/test/elasticsearch_test.py @@ -1,17 +1,26 @@ import pytest import json -from openshift_checks.logging.elasticsearch import Elasticsearch +from openshift_checks.logging.elasticsearch import Elasticsearch, OpenShiftCheckExceptionList + task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin'))) -def assert_error(error, expect_error): - if expect_error: - assert error - assert expect_error in error - else: - assert not error +def canned_elasticsearch(task_vars=None, exec_oc=None): + """Create an Elasticsearch check object with stubbed exec_oc method""" + check = Elasticsearch(None, task_vars or {}) + if exec_oc: + check.exec_oc = exec_oc + return check + + +def assert_error_in_list(expect_err, errorlist): + assert any(err.name == expect_err for err in errorlist), "{} in {}".format(str(expect_err), str(errorlist)) + + +def pods_by_name(pods): + return {pod['metadata']['name']: pod for pod in pods} plain_es_pod = { @@ -19,6 +28,7 @@ plain_es_pod = { "labels": {"component": "es", "deploymentconfig": "logging-es"}, "name": "logging-es", }, + "spec": {}, "status": { "conditions": [{"status": "True", "type": "Ready"}], "containerStatuses": [{"ready": True}], @@ -32,6 +42,7 @@ split_es_pod = { "labels": {"component": "es", "deploymentconfig": "logging-es-2"}, "name": "logging-es-2", }, + "spec": {}, "status": { "conditions": [{"status": "True", "type": "Ready"}], "containerStatuses": [{"ready": True}], @@ -40,12 +51,28 @@ split_es_pod = { "_test_master_name_str": "name logging-es-2", } +unready_es_pod = { + "metadata": { + "labels": {"component": "es", "deploymentconfig": "logging-es-3"}, + "name": "logging-es-3", + }, + "spec": {}, + "status": { + "conditions": [{"status": "False", "type": "Ready"}], + "containerStatuses": [{"ready": False}], + "podIP": "10.10.10.10", + }, + "_test_master_name_str": "BAD_NAME_RESPONSE", +} + def test_check_elasticsearch(): - assert 'No logging Elasticsearch pods' in Elasticsearch().check_elasticsearch([]) + with pytest.raises(OpenShiftCheckExceptionList) as excinfo: + canned_elasticsearch().check_elasticsearch([]) + assert_error_in_list('NoRunningPods', excinfo.value) # canned oc responses to match so all the checks pass - def _exec_oc(ns, cmd, args): + def exec_oc(cmd, args): if '_cat/master' in cmd: return 'name logging-es' elif '/_nodes' in cmd: @@ -57,35 +84,41 @@ def test_check_elasticsearch(): else: raise Exception(cmd) - check = Elasticsearch(None, {}) - check.exec_oc = _exec_oc - assert not check.check_elasticsearch([plain_es_pod]) + check = canned_elasticsearch({}, exec_oc) + check.get_pods_for_component = lambda *_: [plain_es_pod] + assert {} == check.run() -def pods_by_name(pods): - return {pod['metadata']['name']: pod for pod in pods} +def test_check_running_es_pods(): + pods, errors = Elasticsearch().running_elasticsearch_pods([plain_es_pod, unready_es_pod]) + assert plain_es_pod in pods + assert_error_in_list('PodNotRunning', errors) + + +def test_check_elasticsearch_masters(): + pods = [plain_es_pod] + check = canned_elasticsearch(task_vars_config_base, lambda *_: plain_es_pod['_test_master_name_str']) + assert not check.check_elasticsearch_masters(pods_by_name(pods)) @pytest.mark.parametrize('pods, expect_error', [ ( [], - 'No logging Elasticsearch masters', + 'NoMasterFound', ), ( - [plain_es_pod], - None, + [unready_es_pod], + 'NoMasterName', ), ( [plain_es_pod, split_es_pod], - 'Found multiple Elasticsearch masters', + 'SplitBrainMasters', ), ]) -def test_check_elasticsearch_masters(pods, expect_error): +def test_check_elasticsearch_masters_error(pods, expect_error): test_pods = list(pods) - check = Elasticsearch(None, task_vars_config_base) - check.execute_module = lambda cmd, args: {'result': test_pods.pop(0)['_test_master_name_str']} - errors = check._check_elasticsearch_masters(pods_by_name(pods)) - assert_error(''.join(errors), expect_error) + check = canned_elasticsearch(task_vars_config_base, lambda *_: test_pods.pop(0)['_test_master_name_str']) + assert_error_in_list(expect_error, check.check_elasticsearch_masters(pods_by_name(pods))) es_node_list = { @@ -95,83 +128,76 @@ es_node_list = { }}} +def test_check_elasticsearch_node_list(): + check = canned_elasticsearch(task_vars_config_base, lambda *_: json.dumps(es_node_list)) + assert not check.check_elasticsearch_node_list(pods_by_name([plain_es_pod])) + + @pytest.mark.parametrize('pods, node_list, expect_error', [ ( [], {}, - 'No logging Elasticsearch masters', - ), - ( - [plain_es_pod], - es_node_list, - None, + 'MissingComponentPods', ), ( [plain_es_pod], {}, # empty list of nodes triggers KeyError - "Failed to query", + 'MissingNodeList', ), ( [split_es_pod], es_node_list, - 'does not correspond to any known ES pod', + 'EsPodNodeMismatch', ), ]) -def test_check_elasticsearch_node_list(pods, node_list, expect_error): - check = Elasticsearch(None, task_vars_config_base) - check.execute_module = lambda cmd, args: {'result': json.dumps(node_list)} +def test_check_elasticsearch_node_list_errors(pods, node_list, expect_error): + check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list)) + assert_error_in_list(expect_error, check.check_elasticsearch_node_list(pods_by_name(pods))) - errors = check._check_elasticsearch_node_list(pods_by_name(pods)) - assert_error(''.join(errors), expect_error) + +def test_check_elasticsearch_cluster_health(): + test_health_data = [{"status": "green"}] + check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0))) + assert not check.check_es_cluster_health(pods_by_name([plain_es_pod])) @pytest.mark.parametrize('pods, health_data, expect_error', [ ( [plain_es_pod], - [{"status": "green"}], - None, - ), - ( - [plain_es_pod], [{"no-status": "should bomb"}], - 'Could not retrieve cluster health status', + 'BadEsResponse', ), ( [plain_es_pod, split_es_pod], [{"status": "green"}, {"status": "red"}], - 'Elasticsearch cluster health status is RED', + 'EsClusterHealthRed', ), ]) -def test_check_elasticsearch_cluster_health(pods, health_data, expect_error): +def test_check_elasticsearch_cluster_health_errors(pods, health_data, expect_error): test_health_data = list(health_data) - check = Elasticsearch(None, task_vars_config_base) - check.execute_module = lambda cmd, args: {'result': json.dumps(test_health_data.pop(0))} + check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0))) + assert_error_in_list(expect_error, check.check_es_cluster_health(pods_by_name(pods))) - errors = check._check_es_cluster_health(pods_by_name(pods)) - assert_error(''.join(errors), expect_error) + +def test_check_elasticsearch_diskspace(): + check = canned_elasticsearch(exec_oc=lambda *_: 'IUse% Use%\n 3% 4%\n') + assert not check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod])) @pytest.mark.parametrize('disk_data, expect_error', [ ( 'df: /elasticsearch/persistent: No such file or directory\n', - 'Could not retrieve storage usage', - ), - ( - 'IUse% Use%\n 3% 4%\n', - None, + 'BadDfResponse', ), ( 'IUse% Use%\n 95% 40%\n', - 'Inode percent usage on the storage volume', + 'InodeUsageTooHigh', ), ( 'IUse% Use%\n 3% 94%\n', - 'Disk percent usage on the storage volume', + 'DiskUsageTooHigh', ), ]) -def test_check_elasticsearch_diskspace(disk_data, expect_error): - check = Elasticsearch(None, task_vars_config_base) - check.execute_module = lambda cmd, args: {'result': disk_data} - - errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod])) - assert_error(''.join(errors), expect_error) +def test_check_elasticsearch_diskspace_errors(disk_data, expect_error): + check = canned_elasticsearch(exec_oc=lambda *_: disk_data) + assert_error_in_list(expect_error, check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod]))) diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py index e3d6706fa..d3aae98f2 100644 --- a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py +++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py @@ -1,7 +1,8 @@ import pytest from collections import namedtuple -from openshift_checks.etcd_imagedata_size import EtcdImageDataSize, OpenShiftCheckException +from openshift_checks.etcd_imagedata_size import EtcdImageDataSize +from openshift_checks import OpenShiftCheckException from etcdkeysize import check_etcd_key_size @@ -56,7 +57,7 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words): with pytest.raises(OpenShiftCheckException) as excinfo: check.run() - for word in 'determine valid etcd mountpath'.split() + extra_words: + for word in ['Unable to determine mount point'] + extra_words: assert word in str(excinfo.value) diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py index 0b255136e..077cea3ea 100644 --- a/roles/openshift_health_checker/test/etcd_volume_test.py +++ b/roles/openshift_health_checker/test/etcd_volume_test.py @@ -1,6 +1,7 @@ import pytest -from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException +from openshift_checks.etcd_volume import EtcdVolume +from openshift_checks import OpenShiftCheckException @pytest.mark.parametrize('ansible_mounts,extra_words', [ @@ -15,7 +16,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): with pytest.raises(OpenShiftCheckException) as excinfo: EtcdVolume(fake_execute_module, task_vars).run() - for word in 'Unable to find etcd storage mount point'.split() + extra_words: + for word in ['Unable to determine mount point'] + extra_words: assert word in str(excinfo.value) diff --git a/roles/openshift_health_checker/test/fluentd_config_test.py b/roles/openshift_health_checker/test/fluentd_config_test.py index 8a2d8b72b..10db253bc 100644 --- a/roles/openshift_health_checker/test/fluentd_config_test.py +++ b/roles/openshift_health_checker/test/fluentd_config_test.py @@ -198,12 +198,9 @@ def test_check_logging_config_master(name, pods, logging_driver, extra_words): ), ) - def get_pods(namespace, logging_component): - return pods, None - check = FluentdConfig(execute_module, task_vars) check.execute_module = execute_module - check.get_pods_for_component = get_pods + check.get_pods_for_component = lambda _: pods error = check.check_logging_config() assert error is None @@ -283,12 +280,9 @@ def test_check_logging_config_master_failed(name, pods, logging_driver, words): ), ) - def get_pods(namespace, logging_component): - return pods, None - check = FluentdConfig(execute_module, task_vars) check.execute_module = execute_module - check.get_pods_for_component = get_pods + check.get_pods_for_component = lambda _: pods error = check.check_logging_config() assert error is not None @@ -343,11 +337,8 @@ def test_check_logging_config_master_fails_on_unscheduled_deployment(name, pods, ), ) - def get_pods(namespace, logging_component): - return pods, None - check = FluentdConfig(execute_module, task_vars) - check.get_pods_for_component = get_pods + check.get_pods_for_component = lambda _: pods with pytest.raises(OpenShiftCheckException) as error: check.check_logging_config() diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py index a84d89cef..e7bf9818b 100644 --- a/roles/openshift_health_checker/test/fluentd_test.py +++ b/roles/openshift_health_checker/test/fluentd_test.py @@ -1,15 +1,11 @@ import pytest import json -from openshift_checks.logging.fluentd import Fluentd +from openshift_checks.logging.fluentd import Fluentd, OpenShiftCheckExceptionList, OpenShiftCheckException -def assert_error(error, expect_error): - if expect_error: - assert error - assert expect_error in error - else: - assert not error +def assert_error_in_list(expect_err, errorlist): + assert any(err.name == expect_err for err in errorlist), "{} in {}".format(str(expect_err), str(errorlist)) fluentd_pod_node1 = { @@ -57,45 +53,60 @@ fluentd_node3_unlabeled = { } +def test_get_fluentd_pods(): + check = Fluentd() + check.exec_oc = lambda *_: json.dumps(dict(items=[fluentd_node1])) + check.get_pods_for_component = lambda *_: [fluentd_pod_node1] + assert not check.run() + + @pytest.mark.parametrize('pods, nodes, expect_error', [ ( [], [], - 'No nodes appear to be defined', + 'NoNodesDefined', ), ( [], [fluentd_node3_unlabeled], - 'There are no nodes with the fluentd label', + 'NoNodesLabeled', ), ( [], [fluentd_node1, fluentd_node3_unlabeled], - 'Fluentd will not aggregate logs from these nodes.', + 'NodesUnlabeled', ), ( [], [fluentd_node2], - "nodes are supposed to have a Fluentd pod but do not", + 'MissingFluentdPod', ), ( [fluentd_pod_node1, fluentd_pod_node1], [fluentd_node1], - 'more Fluentd pods running than nodes labeled', + 'TooManyFluentdPods', ), ( [fluentd_pod_node2_down], [fluentd_node2], - "Fluentd pods are supposed to be running", - ), - ( - [fluentd_pod_node1], - [fluentd_node1], - None, + 'FluentdNotRunning', ), ]) -def test_get_fluentd_pods(pods, nodes, expect_error): +def test_get_fluentd_pods_errors(pods, nodes, expect_error): + check = Fluentd() + check.exec_oc = lambda *_: json.dumps(dict(items=nodes)) + + with pytest.raises(OpenShiftCheckException) as excinfo: + check.check_fluentd(pods) + if isinstance(excinfo.value, OpenShiftCheckExceptionList): + assert_error_in_list(expect_error, excinfo.value) + else: + assert expect_error == excinfo.value.name + + +def test_bad_oc_node_list(): check = Fluentd() - check.exec_oc = lambda ns, cmd, args: json.dumps(dict(items=nodes)) - error = check.check_fluentd(pods) - assert_error(error, expect_error) + check.exec_oc = lambda *_: "this isn't even json" + with pytest.raises(OpenShiftCheckException) as excinfo: + check.get_nodes_by_name() + assert 'BadOcNodeList' == excinfo.value.name diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py index 0bf492511..04a5e89c4 100644 --- a/roles/openshift_health_checker/test/kibana_test.py +++ b/roles/openshift_health_checker/test/kibana_test.py @@ -8,15 +8,7 @@ except ImportError: from urllib.error import HTTPError, URLError import urllib.request as urllib2 -from openshift_checks.logging.kibana import Kibana - - -def assert_error(error, expect_error): - if expect_error: - assert error - assert expect_error in error - else: - assert not error +from openshift_checks.logging.kibana import Kibana, OpenShiftCheckException plain_kibana_pod = { @@ -41,39 +33,45 @@ not_running_kibana_pod = { } +def test_check_kibana(): + # should run without exception: + Kibana().check_kibana([plain_kibana_pod]) + + @pytest.mark.parametrize('pods, expect_error', [ ( [], - "There are no Kibana pods deployed", - ), - ( - [plain_kibana_pod], - None, + "MissingComponentPods", ), ( [not_running_kibana_pod], - "No Kibana pod is in a running state", + "NoRunningPods", ), ( [plain_kibana_pod, not_running_kibana_pod], - "The following Kibana pods are not currently in a running state", + "PodNotRunning", ), ]) -def test_check_kibana(pods, expect_error): - check = Kibana() - error = check.check_kibana(pods) - assert_error(error, expect_error) +def test_check_kibana_error(pods, expect_error): + with pytest.raises(OpenShiftCheckException) as excinfo: + Kibana().check_kibana(pods) + assert expect_error == excinfo.value.name -@pytest.mark.parametrize('route, expect_url, expect_error', [ +@pytest.mark.parametrize('comment, route, expect_error', [ ( + "No route returned", None, - None, - 'no_route_exists', + "no_route_exists", ), - # test route with no ingress ( + "broken route response", + {"status": {}}, + "get_route_failed", + ), + ( + "route with no ingress", { "metadata": { "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, @@ -86,12 +84,11 @@ def test_check_kibana(pods, expect_error): "host": "hostname", } }, - None, - 'route_not_accepted', + "route_not_accepted", ), - # test route with no host ( + "route with no host", { "metadata": { "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, @@ -104,12 +101,21 @@ def test_check_kibana(pods, expect_error): }, "spec": {}, }, - None, - 'route_missing_host', + "route_missing_host", ), +]) +def test_get_kibana_url_error(comment, route, expect_error): + check = Kibana() + check.exec_oc = lambda *_: json.dumps(route) if route else "" + + with pytest.raises(OpenShiftCheckException) as excinfo: + check._get_kibana_url() + assert excinfo.value.name == expect_error - # test route that looks fine + +@pytest.mark.parametrize('comment, route, expect_url', [ ( + "test route that looks fine", { "metadata": { "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, @@ -125,62 +131,57 @@ def test_check_kibana(pods, expect_error): }, }, "https://hostname/", - None, ), ]) -def test_get_kibana_url(route, expect_url, expect_error): +def test_get_kibana_url(comment, route, expect_url): check = Kibana() - check.exec_oc = lambda ns, cmd, args: json.dumps(route) if route else "" - - url, error = check._get_kibana_url() - if expect_url: - assert url == expect_url - else: - assert not url - if expect_error: - assert error == expect_error - else: - assert not error + check.exec_oc = lambda *_: json.dumps(route) + assert expect_url == check._get_kibana_url() @pytest.mark.parametrize('exec_result, expect', [ ( 'urlopen error [Errno 111] Connection refused', - 'at least one router routing to it?', + 'FailedToConnectInternal', ), ( 'urlopen error [Errno -2] Name or service not known', - 'DNS configured for the Kibana hostname?', + 'FailedToResolveInternal', ), ( 'Status code was not [302]: HTTP Error 500: Server error', - 'did not return the correct status code', + 'WrongReturnCodeInternal', ), ( 'bork bork bork', - 'bork bork bork', # should pass through + 'MiscRouteErrorInternal', ), ]) def test_verify_url_internal_failure(exec_result, expect): check = Kibana(execute_module=lambda *_: dict(failed=True, msg=exec_result)) - check._get_kibana_url = lambda: ('url', None) + check._get_kibana_url = lambda: 'url' - error = check._check_kibana_route() - assert_error(error, expect) + with pytest.raises(OpenShiftCheckException) as excinfo: + check.check_kibana_route() + assert expect == excinfo.value.name @pytest.mark.parametrize('lib_result, expect', [ ( - HTTPError('url', 500, "it broke", hdrs=None, fp=None), - 'it broke', + HTTPError('url', 500, 'it broke', hdrs=None, fp=None), + 'MiscRouteError', + ), + ( + URLError('urlopen error [Errno 111] Connection refused'), + 'FailedToConnect', ), ( - URLError('it broke'), - 'it broke', + URLError('urlopen error [Errno -2] Name or service not known'), + 'FailedToResolve', ), ( 302, - 'returned the wrong error code', + 'WrongReturnCode', ), ( 200, @@ -204,8 +205,40 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch): monkeypatch.setattr(urllib2, 'urlopen', urlopen) check = Kibana() - check._get_kibana_url = lambda: ('url', None) + check._get_kibana_url = lambda: 'url' check._verify_url_internal = lambda url: None - error = check._check_kibana_route() - assert_error(error, expect) + if not expect: + check.check_kibana_route() + return + + with pytest.raises(OpenShiftCheckException) as excinfo: + check.check_kibana_route() + assert expect == excinfo.value.name + + +def test_verify_url_external_skip(): + check = Kibana(lambda *_: {}, dict(openshift_check_efk_kibana_external="false")) + check._get_kibana_url = lambda: 'url' + check.check_kibana_route() + + +# this is kind of silly but it adds coverage for the run() method... +def test_run(): + pods = ["foo"] + ran = dict(check_kibana=False, check_route=False) + + def check_kibana(pod_list): + ran["check_kibana"] = True + assert pod_list == pods + + def check_kibana_route(): + ran["check_route"] = True + + check = Kibana() + check.get_pods_for_component = lambda *_: pods + check.check_kibana = check_kibana + check.check_kibana_route = check_kibana_route + + check.run() + assert ran["check_kibana"] and ran["check_route"] diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py index 6f1697ee6..1a1c190f6 100644 --- a/roles/openshift_health_checker/test/logging_check_test.py +++ b/roles/openshift_health_checker/test/logging_check_test.py @@ -1,18 +1,14 @@ import pytest import json -from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException +from openshift_checks.logging.logging import LoggingCheck, MissingComponentPods, CouldNotUseOc task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin'))) -logging_namespace = "logging" - - -def canned_loggingcheck(exec_oc=None): +def canned_loggingcheck(exec_oc=None, execute_module=None): """Create a LoggingCheck object with canned exec_oc method""" - check = LoggingCheck() # fails if a module is actually invoked - check.logging_namespace = 'logging' + check = LoggingCheck(execute_module) if exec_oc: check.exec_oc = exec_oc return check @@ -97,8 +93,8 @@ def test_oc_failure(problem, expect): check = LoggingCheck(execute_module, task_vars_config_base) - with pytest.raises(OpenShiftCheckException) as excinfo: - check.exec_oc(logging_namespace, 'get foo', []) + with pytest.raises(CouldNotUseOc) as excinfo: + check.exec_oc('get foo', []) assert expect in str(excinfo) @@ -124,25 +120,32 @@ def test_is_active(groups, logging_deployed, is_active): assert LoggingCheck(None, task_vars).is_active() == is_active -@pytest.mark.parametrize('pod_output, expect_pods, expect_error', [ +@pytest.mark.parametrize('pod_output, expect_pods', [ + ( + json.dumps({'items': [plain_es_pod]}), + [plain_es_pod], + ), +]) +def test_get_pods_for_component(pod_output, expect_pods): + check = canned_loggingcheck(lambda *_: pod_output) + pods = check.get_pods_for_component("es") + assert pods == expect_pods + + +@pytest.mark.parametrize('exec_oc_output, expect_error', [ ( 'No resources found.', - None, - 'No pods were found for the "es"', + MissingComponentPods, ), ( - json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}), - [plain_es_pod], - None, + '{"items": null}', + MissingComponentPods, ), ]) -def test_get_pods_for_component(pod_output, expect_pods, expect_error): - check = canned_loggingcheck(lambda namespace, cmd, args: pod_output) - pods, error = check.get_pods_for_component( - logging_namespace, - "es", - ) - assert_error(error, expect_error) +def test_get_pods_for_component_fail(exec_oc_output, expect_error): + check = canned_loggingcheck(lambda *_: exec_oc_output) + with pytest.raises(expect_error): + check.get_pods_for_component("es") @pytest.mark.parametrize('name, pods, expected_pods', [ @@ -159,7 +162,7 @@ def test_get_pods_for_component(pod_output, expect_pods, expect_error): ], ids=lambda argvals: argvals[0]) def test_get_not_running_pods_no_container_status(name, pods, expected_pods): - check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: '') + check = canned_loggingcheck(lambda *_: '') result = check.not_running_pods(pods) assert result == expected_pods diff --git a/roles/openshift_health_checker/test/logging_index_time_test.py b/roles/openshift_health_checker/test/logging_index_time_test.py index 178d7cd84..22566b295 100644 --- a/roles/openshift_health_checker/test/logging_index_time_test.py +++ b/roles/openshift_health_checker/test/logging_index_time_test.py @@ -69,7 +69,29 @@ def test_check_running_pods(pods, expect_pods): assert pods == expect_pods -@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [ +def test_bad_config_param(): + with pytest.raises(OpenShiftCheckException) as error: + LoggingIndexTime(task_vars=dict(openshift_check_logging_index_timeout_seconds="foo")).run() + assert 'InvalidTimeout' == error.value.name + + +def test_no_running_pods(): + check = LoggingIndexTime() + check.get_pods_for_component = lambda *_: [not_running_kibana_pod] + with pytest.raises(OpenShiftCheckException) as error: + check.run() + assert 'kibanaNoRunningPods' == error.value.name + + +def test_with_running_pods(): + check = LoggingIndexTime() + check.get_pods_for_component = lambda *_: [plain_running_kibana_pod, plain_running_elasticsearch_pod] + check.curl_kibana_with_uuid = lambda *_: SAMPLE_UUID + check.wait_until_cmd_or_err = lambda *_: None + assert not check.run().get("failed") + + +@pytest.mark.parametrize('name, json_response, uuid, timeout', [ ( 'valid count in response', { @@ -77,94 +99,72 @@ def test_check_running_pods(pods, expect_pods): }, SAMPLE_UUID, 0.001, - [], ), ], ids=lambda argval: argval[0]) -def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout, extra_words): +def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout): check = canned_loggingindextime(lambda *_: json.dumps(json_response)) check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout) -@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [ +@pytest.mark.parametrize('name, json_response, timeout, expect_error', [ ( 'invalid json response', { "invalid_field": 1, }, - SAMPLE_UUID, 0.001, - ["invalid response", "Elasticsearch"], + 'esInvalidResponse', ), ( 'empty response', {}, - SAMPLE_UUID, 0.001, - ["invalid response", "Elasticsearch"], + 'esInvalidResponse', ), ( 'valid response but invalid match count', { "count": 0, }, - SAMPLE_UUID, 0.005, - ["expecting match", SAMPLE_UUID, "0.005s"], + 'NoMatchFound', ) ], ids=lambda argval: argval[0]) -def test_wait_until_cmd_or_err(name, json_response, uuid, timeout, extra_words): +def test_wait_until_cmd_or_err(name, json_response, timeout, expect_error): check = canned_loggingindextime(lambda *_: json.dumps(json_response)) with pytest.raises(OpenShiftCheckException) as error: - check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout) + check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, SAMPLE_UUID, timeout) - for word in extra_words: - assert word in str(error) + assert expect_error == error.value.name -@pytest.mark.parametrize('name, json_response, uuid, extra_words', [ - ( - 'correct response code, found unique id is returned', - { - "statusCode": 404, - }, - "sample unique id", - ["sample unique id"], - ), -], ids=lambda argval: argval[0]) -def test_curl_kibana_with_uuid(name, json_response, uuid, extra_words): - check = canned_loggingindextime(lambda *_: json.dumps(json_response)) - check.generate_uuid = lambda: uuid - - result = check.curl_kibana_with_uuid(plain_running_kibana_pod) - - for word in extra_words: - assert word in result +def test_curl_kibana_with_uuid(): + check = canned_loggingindextime(lambda *_: json.dumps({"statusCode": 404})) + check.generate_uuid = lambda: SAMPLE_UUID + assert SAMPLE_UUID == check.curl_kibana_with_uuid(plain_running_kibana_pod) -@pytest.mark.parametrize('name, json_response, uuid, extra_words', [ +@pytest.mark.parametrize('name, json_response, expect_error', [ ( 'invalid json response', { "invalid_field": "invalid", }, - SAMPLE_UUID, - ["invalid response returned", 'Missing "statusCode" key'], + 'kibanaInvalidResponse', ), ( 'wrong error code in response', { "statusCode": 500, }, - SAMPLE_UUID, - ["Expecting error code", "500"], + 'kibanaInvalidReturnCode', ), ], ids=lambda argval: argval[0]) -def test_failed_curl_kibana_with_uuid(name, json_response, uuid, extra_words): +def test_failed_curl_kibana_with_uuid(name, json_response, expect_error): check = canned_loggingindextime(lambda *_: json.dumps(json_response)) - check.generate_uuid = lambda: uuid + check.generate_uuid = lambda: SAMPLE_UUID with pytest.raises(OpenShiftCheckException) as error: check.curl_kibana_with_uuid(plain_running_kibana_pod) - for word in extra_words: - assert word in str(error) + assert expect_error == error.value.name diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py index 43aa875f4..789784c77 100644 --- a/roles/openshift_health_checker/test/openshift_check_test.py +++ b/roles/openshift_health_checker/test/openshift_check_test.py @@ -81,6 +81,7 @@ def dummy_check(task_vars): @pytest.mark.parametrize("keys,expected", [ (("foo",), 42), (("bar", "baz"), "openshift"), + (("bar.baz",), "openshift"), ]) def test_get_var_ok(task_vars, keys, expected): assert dummy_check(task_vars).get_var(*keys) == expected @@ -94,3 +95,24 @@ def test_get_var_error(task_vars, missing_keys): def test_get_var_default(task_vars, missing_keys): default = object() assert dummy_check(task_vars).get_var(*missing_keys, default=default) == default + + +@pytest.mark.parametrize("keys, convert, expected", [ + (("foo",), str, "42"), + (("foo",), float, 42.0), + (("bar", "baz"), bool, False), +]) +def test_get_var_convert(task_vars, keys, convert, expected): + assert dummy_check(task_vars).get_var(*keys, convert=convert) == expected + + +@pytest.mark.parametrize("keys, convert", [ + (("bar", "baz"), int), + (("bar.baz"), float), + (("foo"), "bogus"), + (("foo"), lambda a, b: 1), + (("foo"), lambda a: 1 / 0), +]) +def test_get_var_convert_error(task_vars, keys, convert): + with pytest.raises(OpenShiftCheckException): + dummy_check(task_vars).get_var(*keys, convert=convert) diff --git a/roles/openshift_hosted/defaults/main.yml b/roles/openshift_hosted/defaults/main.yml index 0391e5602..13cbfb14e 100644 --- a/roles/openshift_hosted/defaults/main.yml +++ b/roles/openshift_hosted/defaults/main.yml @@ -1,4 +1,10 @@ --- +r_openshift_hosted_router_firewall_enabled: True +r_openshift_hosted_router_use_firewalld: False + +r_openshift_hosted_registry_firewall_enabled: True +r_openshift_hosted_registry_use_firewalld: False + registry_volume_claim: 'registry-claim' openshift_hosted_router_edits: @@ -26,12 +32,15 @@ openshift_hosted_routers: - 443:443 certificate: "{{ openshift_hosted_router_certificate | default({}) }}" - openshift_hosted_router_certificate: {} openshift_hosted_registry_cert_expire_days: 730 openshift_hosted_router_create_certificate: True -os_firewall_allow: +r_openshift_hosted_router_os_firewall_deny: [] +r_openshift_hosted_router_os_firewall_allow: [] + +r_openshift_hosted_registry_os_firewall_deny: [] +r_openshift_hosted_registry_os_firewall_allow: - service: Docker Registry Port port: 5000/tcp - when: openshift.common.use_calico | bool + cond: "{{ r_openshift_hosted_use_calico }}" diff --git a/roles/openshift_hosted/meta/main.yml b/roles/openshift_hosted/meta/main.yml index 9e3f37130..28fd396d6 100644 --- a/roles/openshift_hosted/meta/main.yml +++ b/roles/openshift_hosted/meta/main.yml @@ -15,8 +15,4 @@ dependencies: - role: openshift_cli - role: openshift_hosted_facts - role: lib_openshift -- role: os_firewall - os_firewall_allow: - - service: Docker Registry Port - port: 5000/tcp - when: openshift.common.use_calico | bool +- role: lib_os_firewall diff --git a/roles/openshift_hosted/tasks/registry/firewall.yml b/roles/openshift_hosted/tasks/registry/firewall.yml new file mode 100644 index 000000000..775b7d6d7 --- /dev/null +++ b/roles/openshift_hosted/tasks/registry/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_openshift_hosted_registry_firewall_enabled | bool and not r_openshift_hosted_registry_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_registry_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_registry_os_firewall_deny }}" + +- when: r_openshift_hosted_registry_firewall_enabled | bool and r_openshift_hosted_registry_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_registry_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_registry_os_firewall_deny }}" diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index b946ec8ca..dcd9c87fc 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -1,6 +1,10 @@ --- -- block: +- name: setup firewall + include: firewall.yml + static: yes +- when: openshift.hosted.registry.replicas | default(none) is none + block: - name: Retrieve list of openshift nodes matching registry selector oc_obj: state: list @@ -28,7 +32,6 @@ l_default_replicas: "{{ l_node_count if openshift.hosted.registry.storage.kind | default(none) is not none else 1 }}" when: l_node_count | int > 0 - when: openshift.hosted.registry.replicas | default(none) is none - name: set openshift_hosted facts set_fact: diff --git a/roles/openshift_hosted/tasks/router/firewall.yml b/roles/openshift_hosted/tasks/router/firewall.yml new file mode 100644 index 000000000..ff90f3372 --- /dev/null +++ b/roles/openshift_hosted/tasks/router/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_openshift_hosted_router_firewall_enabled | bool and not r_openshift_hosted_router_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_router_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_router_os_firewall_deny }}" + +- when: r_openshift_hosted_router_firewall_enabled | bool and r_openshift_hosted_router_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_router_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_openshift_hosted_router_os_firewall_deny }}" diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index dd485a64a..72a1ead80 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -1,4 +1,8 @@ --- +- name: setup firewall + include: firewall.yml + static: yes + - name: Retrieve list of openshift nodes matching router selector oc_obj: state: list diff --git a/roles/openshift_hosted_logging/handlers/main.yml b/roles/openshift_hosted_logging/handlers/main.yml index ffb812271..d7e83fe9a 100644 --- a/roles/openshift_hosted_logging/handlers/main.yml +++ b/roles/openshift_hosted_logging/handlers/main.yml @@ -1,9 +1,4 @@ --- -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) - notify: Verify API Server - - name: Verify API Server # Using curl here since the uri module requires python-httplib2 and # wait_for port doesn't provide health information. diff --git a/roles/openshift_hosted_metrics/handlers/main.yml b/roles/openshift_hosted_metrics/handlers/main.yml index 69c5a1663..ce7688581 100644 --- a/roles/openshift_hosted_metrics/handlers/main.yml +++ b/roles/openshift_hosted_metrics/handlers/main.yml @@ -1,17 +1,12 @@ --- -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) - notify: Verify API Server - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server - name: restart master controllers systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server # Using curl here since the uri module requires python-httplib2 and diff --git a/roles/openshift_loadbalancer/defaults/main.yml b/roles/openshift_loadbalancer/defaults/main.yml index 6190383b6..3f6409233 100644 --- a/roles/openshift_loadbalancer/defaults/main.yml +++ b/roles/openshift_loadbalancer/defaults/main.yml @@ -1,4 +1,7 @@ --- +r_openshift_loadbalancer_firewall_enabled: True +r_openshift_loadbalancer_use_firewalld: False + haproxy_frontends: - name: main binds: @@ -12,3 +15,13 @@ haproxy_backends: - name: web01 address: 127.0.0.1:9000 opts: check + +r_openshift_loadbalancer_os_firewall_deny: [] +r_openshift_loadbalancer_os_firewall_allow: +- service: haproxy stats + port: "9000/tcp" +- service: haproxy balance + port: "{{ openshift_master_api_port | default(8443) }}/tcp" +- service: nuage mon + port: "{{ nuage_mon_rest_server_port | default(9443) }}/tcp" + cond: "{{ openshift_use_nuage | default(false) | bool }}" diff --git a/roles/openshift_loadbalancer/meta/main.yml b/roles/openshift_loadbalancer/meta/main.yml index 0dffb545f..073bdd94d 100644 --- a/roles/openshift_loadbalancer/meta/main.yml +++ b/roles/openshift_loadbalancer/meta/main.yml @@ -10,16 +10,6 @@ galaxy_info: versions: - 7 dependencies: +- role: lib_os_firewall - role: openshift_facts -- role: os_firewall - os_firewall_allow: - - service: haproxy stats - port: "9000/tcp" - - service: haproxy balance - port: "{{ openshift_master_api_port | default(8443) }}/tcp" -- role: os_firewall - os_firewall_allow: - - service: nuage mon - port: "{{ nuage_mon_rest_server_port | default(9443) }}/tcp" - when: openshift_use_nuage | default(false) | bool - role: openshift_repos diff --git a/roles/openshift_loadbalancer/tasks/firewall.yml b/roles/openshift_loadbalancer/tasks/firewall.yml new file mode 100644 index 000000000..7d6e8ff36 --- /dev/null +++ b/roles/openshift_loadbalancer/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_openshift_loadbalancer_firewall_enabled | bool and not r_openshift_loadbalancer_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_loadbalancer_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_loadbalancer_os_firewall_deny }}" + +- when: r_openshift_loadbalancer_firewall_enabled | bool and r_openshift_loadbalancer_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_openshift_loadbalancer_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_openshift_loadbalancer_os_firewall_deny }}" diff --git a/roles/openshift_loadbalancer/tasks/main.yml b/roles/openshift_loadbalancer/tasks/main.yml index 68bb4ace8..69b061fc5 100644 --- a/roles/openshift_loadbalancer/tasks/main.yml +++ b/roles/openshift_loadbalancer/tasks/main.yml @@ -1,4 +1,8 @@ --- +- name: setup firewall + include: firewall.yml + static: yes + - name: Install haproxy package: name=haproxy state=present when: not openshift.common.is_containerized | bool diff --git a/roles/openshift_logging/handlers/main.yml b/roles/openshift_logging/handlers/main.yml index 69c5a1663..ce7688581 100644 --- a/roles/openshift_logging/handlers/main.yml +++ b/roles/openshift_logging/handlers/main.yml @@ -1,17 +1,12 @@ --- -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) - notify: Verify API Server - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server - name: restart master controllers systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server # Using curl here since the uri module requires python-httplib2 and diff --git a/roles/openshift_logging/tasks/update_master_config.yaml b/roles/openshift_logging/tasks/update_master_config.yaml index 10f522b61..b96b8e29d 100644 --- a/roles/openshift_logging/tasks/update_master_config.yaml +++ b/roles/openshift_logging/tasks/update_master_config.yaml @@ -5,7 +5,6 @@ yaml_key: assetConfig.loggingPublicURL yaml_value: "https://{{ openshift_logging_kibana_hostname }}" notify: - - restart master - restart master api - restart master controllers tags: diff --git a/roles/openshift_logging_fluentd/tasks/main.yaml b/roles/openshift_logging_fluentd/tasks/main.yaml index 9dfc6fc86..74b4d7db4 100644 --- a/roles/openshift_logging_fluentd/tasks/main.yaml +++ b/roles/openshift_logging_fluentd/tasks/main.yaml @@ -1,7 +1,7 @@ --- - fail: msg: Only one Fluentd nodeselector key pair should be provided - when: "{{ openshift_logging_fluentd_nodeselector.keys() | count }} > 1" + when: openshift_logging_fluentd_nodeselector.keys() | count > 1 - fail: msg: Application logs destination is required diff --git a/roles/openshift_logging_kibana/tasks/main.yaml b/roles/openshift_logging_kibana/tasks/main.yaml index 62bc26e37..166f102f7 100644 --- a/roles/openshift_logging_kibana/tasks/main.yaml +++ b/roles/openshift_logging_kibana/tasks/main.yaml @@ -99,17 +99,17 @@ # TODO: set up these certs differently? - set_fact: kibana_key: "{{ lookup('file', openshift_logging_kibana_key) | b64encode }}" - when: "{{ openshift_logging_kibana_key | trim | length > 0 }}" + when: openshift_logging_kibana_key | trim | length > 0 changed_when: false - set_fact: kibana_cert: "{{ lookup('file', openshift_logging_kibana_cert) | b64encode }}" - when: "{{ openshift_logging_kibana_cert | trim | length > 0 }}" + when: openshift_logging_kibana_cert | trim | length > 0 changed_when: false - set_fact: kibana_ca: "{{ lookup('file', openshift_logging_kibana_ca) | b64encode }}" - when: "{{ openshift_logging_kibana_ca | trim | length > 0 }}" + when: openshift_logging_kibana_ca | trim | length > 0 changed_when: false - set_fact: diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml index 2d3ce5bcd..a4c178908 100644 --- a/roles/openshift_master/defaults/main.yml +++ b/roles/openshift_master/defaults/main.yml @@ -1,4 +1,21 @@ --- +r_openshift_master_firewall_enabled: True +r_openshift_master_use_firewalld: False + openshift_node_ips: [] r_openshift_master_clean_install: false r_openshift_master_etcd3_storage: false +r_openshift_master_os_firewall_enable: true +r_openshift_master_os_firewall_deny: [] +r_openshift_master_os_firewall_allow: +- service: api server https + port: "{{ openshift.master.api_port }}/tcp" +- service: api controllers https + port: "{{ openshift.master.controllers_port }}/tcp" +- service: skydns tcp + port: "{{ openshift.master.dns_port }}/tcp" +- service: skydns udp + port: "{{ openshift.master.dns_port }}/udp" +- service: etcd embedded + port: 4001/tcp + cond: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" diff --git a/roles/openshift_master/files/atomic-openshift-master.service b/roles/openshift_master/files/atomic-openshift-master.service deleted file mode 100644 index 02af4dd16..000000000 --- a/roles/openshift_master/files/atomic-openshift-master.service +++ /dev/null @@ -1,23 +0,0 @@ -[Unit] -Description=Atomic OpenShift Master -Documentation=https://github.com/openshift/origin -After=network-online.target -After=etcd.service -Before=atomic-openshift-node.service -Requires=network-online.target - -[Service] -Type=notify -EnvironmentFile=/etc/sysconfig/atomic-openshift-master -Environment=GOTRACEBACK=crash -ExecStart=/usr/bin/openshift start master --config=${CONFIG_FILE} $OPTIONS -LimitNOFILE=131072 -LimitCORE=infinity -WorkingDirectory=/var/lib/origin/ -SyslogIdentifier=atomic-openshift-master -Restart=always -RestartSec=5s - -[Install] -WantedBy=multi-user.target -WantedBy=atomic-openshift-node.service diff --git a/roles/openshift_master/files/origin-master.service b/roles/openshift_master/files/origin-master.service deleted file mode 100644 index cf79dda02..000000000 --- a/roles/openshift_master/files/origin-master.service +++ /dev/null @@ -1,23 +0,0 @@ -[Unit] -Description=Origin Master Service -Documentation=https://github.com/openshift/origin -After=network-online.target -After=etcd.service -Before=origin-node.service -Requires=network-online.target - -[Service] -Type=notify -EnvironmentFile=/etc/sysconfig/origin-master -Environment=GOTRACEBACK=crash -ExecStart=/usr/bin/openshift start master --config=${CONFIG_FILE} $OPTIONS -LimitNOFILE=131072 -LimitCORE=infinity -WorkingDirectory=/var/lib/origin/ -SyslogIdentifier=origin-master -Restart=always -RestartSec=5s - -[Install] -WantedBy=multi-user.target -WantedBy=origin-node.service diff --git a/roles/openshift_master/handlers/main.yml b/roles/openshift_master/handlers/main.yml index 69c5a1663..ce7688581 100644 --- a/roles/openshift_master/handlers/main.yml +++ b/roles/openshift_master/handlers/main.yml @@ -1,17 +1,12 @@ --- -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) - notify: Verify API Server - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server - name: restart master controllers systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server # Using curl here since the uri module requires python-httplib2 and diff --git a/roles/openshift_master/meta/main.yml b/roles/openshift_master/meta/main.yml index 907f25bc5..bd2383f61 100644 --- a/roles/openshift_master/meta/main.yml +++ b/roles/openshift_master/meta/main.yml @@ -13,6 +13,7 @@ galaxy_info: - cloud dependencies: - role: lib_openshift +- role: lib_os_firewall - role: openshift_master_facts - role: openshift_hosted_facts - role: openshift_master_certificates @@ -25,21 +26,6 @@ dependencies: - role: openshift_cloud_provider - role: openshift_builddefaults - role: openshift_buildoverrides -- role: os_firewall - os_firewall_allow: - - service: api server https - port: "{{ openshift.master.api_port }}/tcp" - - service: api controllers https - port: "{{ openshift.master.controllers_port }}/tcp" - - service: skydns tcp - port: "{{ openshift.master.dns_port }}/tcp" - - service: skydns udp - port: "{{ openshift.master.dns_port }}/udp" -- role: os_firewall - os_firewall_allow: - - service: etcd embedded - port: 4001/tcp - when: groups.oo_etcd_to_config | default([]) | length == 0 - role: nickhammond.logrotate - role: contiv contiv_role: netmaster diff --git a/roles/openshift_master/tasks/clean_systemd_units.yml b/roles/openshift_master/tasks/clean_systemd_units.yml new file mode 100644 index 000000000..590692c10 --- /dev/null +++ b/roles/openshift_master/tasks/clean_systemd_units.yml @@ -0,0 +1,5 @@ +--- + +- name: Disable master service + systemd: name={{ openshift.common.service_type }}-master state=stopped enabled=no masked=yes + ignore_errors: true diff --git a/roles/openshift_master/tasks/files b/roles/openshift_master/tasks/files deleted file mode 120000 index feb122881..000000000 --- a/roles/openshift_master/tasks/files +++ /dev/null @@ -1 +0,0 @@ -../files
\ No newline at end of file diff --git a/roles/openshift_master/tasks/firewall.yml b/roles/openshift_master/tasks/firewall.yml new file mode 100644 index 000000000..e51eeb56e --- /dev/null +++ b/roles/openshift_master/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_openshift_master_firewall_enabled | bool and not r_openshift_master_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_master_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_master_os_firewall_deny }}" + +- when: r_openshift_master_firewall_enabled | bool and r_openshift_master_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_openshift_master_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_openshift_master_os_firewall_deny }}" diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 1f182a25c..b1412c3d9 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -12,16 +12,20 @@ # HA Variable Validation - fail: msg: "openshift_master_cluster_method must be set to either 'native' or 'pacemaker' for multi-master installations" - when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method not in ["native", "pacemaker"])) + when: openshift.master.ha | bool and ((openshift.master.cluster_method is not defined) or (openshift.master.cluster_method is defined and openshift.master.cluster_method not in ["native", "pacemaker"])) - fail: msg: "'native' high availability is not supported for the requested OpenShift version" - when: openshift_master_ha | bool and openshift_master_cluster_method == "native" and not openshift.common.version_gte_3_1_or_1_1 | bool + when: openshift.master.ha | bool and openshift.master.cluster_method == "native" and not openshift.common.version_gte_3_1_or_1_1 | bool - fail: msg: "openshift_master_cluster_password must be set for multi-master installations" - when: openshift_master_ha | bool and openshift_master_cluster_method == "pacemaker" and (openshift_master_cluster_password is not defined or not openshift_master_cluster_password) + when: openshift.master.ha | bool and openshift.master.cluster_method == "pacemaker" and (openshift_master_cluster_password is not defined or not openshift_master_cluster_password) - fail: msg: "Pacemaker based HA is not supported at this time when used with containerized installs" - when: openshift_master_ha | bool and openshift_master_cluster_method == "pacemaker" and openshift.common.is_containerized | bool + when: openshift.master.ha | bool and openshift.master.cluster_method == "pacemaker" and openshift.common.is_containerized | bool + +- name: Open up firewall ports + include: firewall.yml + static: yes - name: Install Master package package: @@ -57,7 +61,6 @@ args: creates: "{{ openshift_master_policy }}" notify: - - restart master - restart master api - restart master controllers @@ -67,7 +70,6 @@ dest: "{{ openshift_master_scheduler_conf }}" backup: true notify: - - restart master - restart master api - restart master controllers @@ -146,6 +148,9 @@ local_facts: no_proxy_etcd_host_ips: "{{ openshift_no_proxy_etcd_host_ips }}" +- name: Remove the legacy master service if it exists + include: clean_systemd_units.yml + - name: Install the systemd units include: systemd_units.yml @@ -162,7 +167,6 @@ mode: 0600 when: openshift.master.session_auth_secrets is defined and openshift.master.session_encryption_secrets is defined notify: - - restart master - restart master api - set_fact: @@ -178,66 +182,18 @@ group: root mode: 0600 notify: - - restart master - restart master api - restart master controllers - include: set_loopback_context.yml when: openshift.common.version_gte_3_2_or_1_2 -# TODO: Master startup can fail when ec2 transparently reallocates the block -# storage, causing etcd writes to temporarily fail. Retry failures blindly just -# once to allow time for this transient condition to to resolve and for systemd -# to restart the master (which will eventually succeed). -# -# https://github.com/coreos/etcd/issues/3864 -# https://github.com/openshift/origin/issues/6065 -# https://github.com/openshift/origin/issues/6447 -- name: Start and enable master - systemd: - daemon_reload: yes - name: "{{ openshift.common.service_type }}-master" - enabled: yes - state: started - when: not openshift_master_ha | bool - register: start_result - until: not start_result | failed - retries: 1 - delay: 60 - notify: Verify API Server - -- name: Dump logs from master service if it failed - command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master - when: start_result | failed - -- name: Stop and disable non-HA master when running HA - systemd: - name: "{{ openshift.common.service_type }}-master" - enabled: no - state: stopped - when: openshift_master_ha | bool - register: task_result - failed_when: task_result|failed and 'could not' not in task_result.msg|lower - -- set_fact: - master_service_status_changed: "{{ start_result | changed }}" - when: not openshift_master_ha | bool - -- name: Mask master service - systemd: - name: "{{ openshift.common.service_type }}-master" - masked: yes - when: > - openshift_master_ha | bool and - openshift.master.cluster_method == 'native' and - not openshift.common.is_containerized | bool - - name: Start and enable master api on first master systemd: name: "{{ openshift.common.service_type }}-master-api" enabled: yes state: started - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] + when: openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] register: start_result until: not start_result | failed retries: 1 @@ -249,18 +205,18 @@ - set_fact: master_api_service_status_changed: "{{ start_result | changed }}" - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] + when: openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] - pause: seconds: 15 - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' + when: openshift.master.ha | bool and openshift.master.cluster_method == 'native' - name: Start and enable master api all masters systemd: name: "{{ openshift.common.service_type }}-master-api" enabled: yes state: started - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] + when: openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] register: start_result until: not start_result | failed retries: 1 @@ -272,7 +228,7 @@ - set_fact: master_api_service_status_changed: "{{ start_result | changed }}" - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] + when: openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] # A separate wait is required here for native HA since notifies will # be resolved after all tasks in the role. @@ -293,14 +249,14 @@ delay: 1 run_once: true changed_when: false - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and master_api_service_status_changed | bool + when: openshift.master.cluster_method == 'native' and master_api_service_status_changed | bool - name: Start and enable master controller on first master systemd: name: "{{ openshift.common.service_type }}-master-controllers" enabled: yes state: started - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] + when: openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] register: start_result until: not start_result | failed retries: 1 @@ -313,14 +269,14 @@ - name: Wait for master controller service to start on first master pause: seconds: 15 - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' + when: openshift.master.cluster_method == 'native' - name: Start and enable master controller on all masters systemd: name: "{{ openshift.common.service_type }}-master-controllers" enabled: yes state: started - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] + when: openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] register: start_result until: not start_result | failed retries: 1 @@ -332,11 +288,11 @@ - set_fact: master_controllers_service_status_changed: "{{ start_result | changed }}" - when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' + when: openshift.master.cluster_method == 'native' - name: Install cluster packages package: name=pcs state=present - when: openshift_master_ha | bool and openshift.master.cluster_method == 'pacemaker' + when: openshift.master.cluster_method == 'pacemaker' and not openshift.common.is_containerized | bool register: install_result @@ -345,7 +301,7 @@ name: pcsd enabled: yes state: started - when: openshift_master_ha | bool and openshift.master.cluster_method == 'pacemaker' + when: openshift.master.cluster_method == 'pacemaker' and not openshift.common.is_containerized | bool - name: Set the cluster user password diff --git a/roles/openshift_master/tasks/system_container.yml b/roles/openshift_master/tasks/system_container.yml index 9944682cc..8d343336f 100644 --- a/roles/openshift_master/tasks/system_container.yml +++ b/roles/openshift_master/tasks/system_container.yml @@ -10,14 +10,6 @@ atomic containers list --no-trunc -a -f container={{ openshift.common.service_type }}-master register: result -- name: Install or Update master system container - oc_atomic_container: - name: "{{ openshift.common.service_type }}-master" - image: "{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{{ openshift.master.master_system_image }}:{{ openshift_image_tag }}" - state: latest - when: - - not l_is_ha - # HA - name: Install or Update HA api master system container oc_atomic_container: @@ -26,15 +18,11 @@ state: latest values: - COMMAND=api - when: - - l_is_ha - name: Install or Update HA controller master system container oc_atomic_container: name: "{{ openshift.common.service_type }}-master-controllers" - image: "{{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{ openshift.master.master_system_image }}:{{ openshift_image_tag }}" + image: "{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{{ openshift.master.master_system_image }}:{{ openshift_image_tag }}" state: latest values: - COMMAND=controllers - when: - - l_is_ha diff --git a/roles/openshift_master/tasks/systemd_units.yml b/roles/openshift_master/tasks/systemd_units.yml index d71ad3459..723bdb0c4 100644 --- a/roles/openshift_master/tasks/systemd_units.yml +++ b/roles/openshift_master/tasks/systemd_units.yml @@ -22,34 +22,12 @@ changed_when: "'Downloaded newer image' in pull_result.stdout" when: openshift.common.is_containerized | bool and not openshift.common.is_master_system_container | bool -# workaround for missing systemd unit files -- name: "Create the {{ openshift.common.service_type }} systemd unit file" - template: - src: "master_docker/master.docker.service.j2" - dest: "{{ containerized_svc_dir }}/{{ openshift.common.service_type }}-master.service" - when: - - openshift.common.is_containerized | bool and (openshift.master.ha is not defined or not openshift.master.ha) | bool - - not openshift.common.is_master_system_container | bool - register: create_master_unit_file - -- name: "Install {{ openshift.common.service_type }} systemd unit file" - copy: - dest: "/etc/systemd/system/{{ openshift.common.service_type }}-master.service" - src: "{{ openshift.common.service_type }}-master.service" - register: create_master_unit_file - when: - - not openshift.common.is_containerized | bool - - (openshift.master.ha is not defined or not openshift.master.ha) | bool - -- command: systemctl daemon-reload - when: create_master_unit_file | changed - -- name: Create the ha systemd unit files for api and controller services +- name: Create the ha systemd unit files template: src: "{{ ha_svc_template_path }}/atomic-openshift-master-{{ item }}.service.j2" dest: "{{ containerized_svc_dir }}/{{ openshift.common.service_type }}-master-{{ item }}.service" when: - - openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + - openshift.master.cluster_method == "native" - not openshift.common.is_master_system_container | bool with_items: - api @@ -63,14 +41,14 @@ - name: Preserve Master API Proxy Config options command: grep PROXY /etc/sysconfig/{{ openshift.common.service_type }}-master-api register: master_api_proxy - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" failed_when: false changed_when: false - name: Preserve Master API AWS options command: grep AWS_ /etc/sysconfig/{{ openshift.common.service_type }}-master-api register: master_api_aws - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" failed_when: false changed_when: false @@ -79,12 +57,12 @@ src: "{{ ha_svc_template_path }}/atomic-openshift-master-api.j2" dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api backup: true - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" notify: - restart master api - name: Restore Master API Proxy Config Options - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" and master_api_proxy.rc == 0 and 'http_proxy' not in openshift.common and 'https_proxy' not in openshift.common lineinfile: dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api @@ -92,7 +70,7 @@ with_items: "{{ master_api_proxy.stdout_lines | default([]) }}" - name: Restore Master API AWS Options - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" and master_api_aws.rc == 0 and not (openshift_cloudprovider_kind is defined and openshift_cloudprovider_kind == 'aws' and openshift_cloudprovider_aws_access_key is defined and openshift_cloudprovider_aws_secret_key is defined) lineinfile: @@ -104,14 +82,14 @@ - name: Preserve Master Controllers Proxy Config options command: grep PROXY /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers register: master_controllers_proxy - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" failed_when: false changed_when: false - name: Preserve Master Controllers AWS options command: grep AWS_ /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers register: master_controllers_aws - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" failed_when: false changed_when: false @@ -120,7 +98,7 @@ src: "{{ ha_svc_template_path }}/atomic-openshift-master-controllers.j2" dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers backup: true - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" notify: - restart master controllers @@ -129,7 +107,7 @@ dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers line: "{{ item }}" with_items: "{{ master_controllers_proxy.stdout_lines | default([]) }}" - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" and master_controllers_proxy.rc == 0 and 'http_proxy' not in openshift.common and 'https_proxy' not in openshift.common - name: Restore Master Controllers AWS Options @@ -137,39 +115,6 @@ dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers line: "{{ item }}" with_items: "{{ master_controllers_aws.stdout_lines | default([]) }}" - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native" + when: openshift.master.cluster_method == "native" and master_controllers_aws.rc == 0 and not (openshift_cloudprovider_kind is defined and openshift_cloudprovider_kind == 'aws' and openshift_cloudprovider_aws_access_key is defined and openshift_cloudprovider_aws_secret_key is defined) - -- name: Install Master docker service file - template: - dest: "/etc/systemd/system/{{ openshift.common.service_type }}-master.service" - src: master_docker/master.docker.service.j2 - register: install_result - when: openshift.common.is_containerized | bool and openshift.master.ha is defined and not openshift.master.ha | bool and not openshift.common.is_master_system_container | bool - -- name: Preserve Master Proxy Config options - command: grep PROXY /etc/sysconfig/{{ openshift.common.service_type }}-master - register: master_proxy_result - failed_when: false - changed_when: false - -- set_fact: - master_proxy: "{{ master_proxy_result.stdout_lines | default([]) }}" - -- name: Preserve Master AWS options - command: grep AWS_ /etc/sysconfig/{{ openshift.common.service_type }}-master - register: master_aws_result - failed_when: false - changed_when: false - -- set_fact: - master_aws: "{{ master_aws_result.stdout_lines | default([]) }}" - -- name: Create the master service env file - template: - src: "atomic-openshift-master.j2" - dest: /etc/sysconfig/{{ openshift.common.service_type }}-master - backup: true - notify: - - restart master diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index 7964bbb48..c14579435 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -47,11 +47,10 @@ assetConfig: {% if openshift.master.audit_config | default(none) is not none and openshift.common.version_gte_3_2_or_1_2 | bool %} auditConfig:{{ openshift.master.audit_config | to_padded_yaml(level=1) }} {% endif %} -{% if openshift_master_ha | bool %} -controllerLeaseTTL: {{ openshift.master.controller_lease_ttl | default('30') }} -{% endif %} {% if openshift.common.version_gte_3_3_or_1_3 | bool %} controllerConfig: + election: + lockName: openshift-master-controllers serviceServingCert: signer: certFile: service-signer.crt diff --git a/roles/openshift_master/templates/master_docker/master.docker.service.j2 b/roles/openshift_master/templates/master_docker/master.docker.service.j2 deleted file mode 100644 index 31c1dfc33..000000000 --- a/roles/openshift_master/templates/master_docker/master.docker.service.j2 +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -After={{ openshift.docker.service_name }}.service -Requires={{ openshift.docker.service_name }}.service -PartOf={{ openshift.docker.service_name }}.service -After=etcd_container.service -Wants=etcd_container.service - -[Service] -EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master -ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-master -ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/log:/var/log -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {% if openshift_cloudprovider_kind | default('') != '' -%} -v {{ openshift.common.config_base }}/cloudprovider:{{ openshift.common.config_base}}/cloudprovider {% endif -%} -v /etc/pki:/etc/pki:ro {{ openshift.master.master_image }}:${IMAGE_VERSION} start master --config=${CONFIG_FILE} $OPTIONS -ExecStartPost=/usr/bin/sleep 10 -ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master -Restart=always -RestartSec=5s - -[Install] -WantedBy={{ openshift.docker.service_name }}.service diff --git a/roles/openshift_master/vars/main.yml b/roles/openshift_master/vars/main.yml index 7745d014f..cf39b73f6 100644 --- a/roles/openshift_master/vars/main.yml +++ b/roles/openshift_master/vars/main.yml @@ -19,5 +19,4 @@ openshift_master_valid_grant_methods: - prompt - deny -l_is_ha: "{{ openshift.master.ha is defined and openshift.master.ha | bool }}" openshift_master_is_scaleup_host: False diff --git a/roles/openshift_master_facts/tasks/main.yml b/roles/openshift_master_facts/tasks/main.yml index ef8dcd5fd..fa228af2a 100644 --- a/roles/openshift_master_facts/tasks/main.yml +++ b/roles/openshift_master_facts/tasks/main.yml @@ -32,7 +32,7 @@ openshift_facts: role: master local_facts: - cluster_method: "{{ openshift_master_cluster_method | default(None) }}" + cluster_method: "{{ openshift_master_cluster_method | default('native') }}" cluster_hostname: "{{ openshift_master_cluster_hostname | default(None) }}" cluster_public_hostname: "{{ openshift_master_cluster_public_hostname | default(None) }}" debug_level: "{{ openshift_master_debug_level | default(openshift.common.debug_level) }}" diff --git a/roles/openshift_metrics/handlers/main.yml b/roles/openshift_metrics/handlers/main.yml index 69c5a1663..ce7688581 100644 --- a/roles/openshift_metrics/handlers/main.yml +++ b/roles/openshift_metrics/handlers/main.yml @@ -1,17 +1,12 @@ --- -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) - notify: Verify API Server - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server - name: restart master controllers systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted - when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server # Using curl here since the uri module requires python-httplib2 and diff --git a/roles/openshift_metrics/tasks/update_master_config.yaml b/roles/openshift_metrics/tasks/update_master_config.yaml index be1e3c3a0..5059d8d94 100644 --- a/roles/openshift_metrics/tasks/update_master_config.yaml +++ b/roles/openshift_metrics/tasks/update_master_config.yaml @@ -5,7 +5,6 @@ yaml_key: assetConfig.metricsPublicURL yaml_value: "https://{{ openshift_metrics_hawkular_hostname}}/hawkular/metrics" notify: - - restart master - restart master api - restart master controllers tags: diff --git a/roles/openshift_node/defaults/main.yml b/roles/openshift_node/defaults/main.yml index 47073ee0f..973b3a619 100644 --- a/roles/openshift_node/defaults/main.yml +++ b/roles/openshift_node/defaults/main.yml @@ -1,5 +1,8 @@ --- -os_firewall_allow: +r_openshift_node_firewall_enabled: True +r_openshift_node_use_firewalld: False +r_openshift_node_os_firewall_deny: [] +r_openshift_node_os_firewall_allow: - service: Kubernetes kubelet port: 10250/tcp - service: http @@ -8,7 +11,13 @@ os_firewall_allow: port: 443/tcp - service: OpenShift OVS sdn port: 4789/udp - when: openshift.common.use_openshift_sdn | default(true) | bool + cond: openshift.common.use_openshift_sdn | default(true) | bool - service: Calico BGP Port port: 179/tcp - when: openshift.common.use_calico | bool + cond: "{{ openshift.common.use_calico | bool }}" +- service: Kubernetes service NodePort TCP + port: "{{ openshift_node_port_range | default('') }}/tcp" + cond: "{{ openshift_node_port_range is defined }}" +- service: Kubernetes service NodePort UDP + port: "{{ openshift_node_port_range | default('') }}/udp" + cond: "{{ openshift_node_port_range is defined }}" diff --git a/roles/openshift_node/meta/main.yml b/roles/openshift_node/meta/main.yml index 4fb841add..06373de04 100644 --- a/roles/openshift_node/meta/main.yml +++ b/roles/openshift_node/meta/main.yml @@ -14,36 +14,11 @@ galaxy_info: dependencies: - role: openshift_node_facts - role: lib_openshift +- role: lib_os_firewall - role: openshift_common - role: openshift_clock - role: openshift_docker - role: openshift_node_certificates - role: openshift_cloud_provider -- role: os_firewall - os_firewall_allow: - - service: Kubernetes kubelet - port: 10250/tcp - - service: http - port: 80/tcp - - service: https - port: 443/tcp -- role: os_firewall - os_firewall_allow: - - service: OpenShift OVS sdn - port: 4789/udp - when: openshift.common.use_openshift_sdn | default(true) | bool -- role: os_firewall - os_firewall_allow: - - service: Calico BGP Port - port: 179/tcp - when: openshift.common.use_calico | bool - -- role: os_firewall - os_firewall_allow: - - service: Kubernetes service NodePort TCP - port: "{{ openshift_node_port_range | default('') }}/tcp" - - service: Kubernetes service NodePort UDP - port: "{{ openshift_node_port_range | default('') }}/udp" - when: openshift_node_port_range is defined - role: openshift_node_dnsmasq when: openshift.common.use_dnsmasq | bool diff --git a/roles/openshift_node/tasks/firewall.yml b/roles/openshift_node/tasks/firewall.yml new file mode 100644 index 000000000..255aa886a --- /dev/null +++ b/roles/openshift_node/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_openshift_node_firewall_enabled | bool and not r_openshift_node_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_node_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_node_os_firewall_deny }}" + +- when: r_openshift_node_firewall_enabled | bool and r_openshift_node_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_openshift_node_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_openshift_node_os_firewall_deny }}" diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index ca4fef360..3353a22e3 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -6,6 +6,38 @@ - (not ansible_selinux or ansible_selinux.status != 'enabled') and deployment_type in ['enterprise', 'online', 'atomic-enterprise', 'openshift-enterprise'] - not openshift_docker_use_crio | default(false) +- name: setup firewall + include: firewall.yml + static: yes + +- name: Set node facts + openshift_facts: + role: "{{ item.role }}" + local_facts: "{{ item.local_facts }}" + with_items: + # Reset node labels to an empty dictionary. + - role: node + local_facts: + labels: {} + - role: node + local_facts: + annotations: "{{ openshift_node_annotations | default(none) }}" + debug_level: "{{ openshift_node_debug_level | default(openshift.common.debug_level) }}" + iptables_sync_period: "{{ openshift_node_iptables_sync_period | default(None) }}" + kubelet_args: "{{ openshift_node_kubelet_args | default(None) }}" + labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default(none), true) }}" + registry_url: "{{ oreg_url_node | default(oreg_url) | default(None) }}" + schedulable: "{{ openshift_schedulable | default(openshift_scheduleable) | default(None) }}" + sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}" + storage_plugin_deps: "{{ osn_storage_plugin_deps | default(None) }}" + set_node_ip: "{{ openshift_set_node_ip | default(None) }}" + node_image: "{{ osn_image | default(None) }}" + ovs_image: "{{ osn_ovs_image | default(None) }}" + proxy_mode: "{{ openshift_node_proxy_mode | default('iptables') }}" + local_quota_per_fsgroup: "{{ openshift_node_local_quota_per_fsgroup | default(None) }}" + dns_ip: "{{ openshift_dns_ip | default(none) | get_dns_ip(hostvars[inventory_hostname])}}" + env_vars: "{{ openshift_node_env_vars | default(None) }}" + # https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory - name: Check for swap usage command: grep "^[^#].*swap" /etc/fstab diff --git a/roles/openshift_node_upgrade/tasks/main.yml b/roles/openshift_node_upgrade/tasks/main.yml index f984a04b2..bc092c26c 100644 --- a/roles/openshift_node_upgrade/tasks/main.yml +++ b/roles/openshift_node_upgrade/tasks/main.yml @@ -24,7 +24,6 @@ name: "{{ item }}" state: stopped with_items: - - "{{ openshift.common.service_type }}-master" - "{{ openshift.common.service_type }}-master-controllers" - "{{ openshift.common.service_type }}-master-api" - etcd_container @@ -81,7 +80,6 @@ with_items: - etcd_container - openvswitch - - "{{ openshift.common.service_type }}-master" - "{{ openshift.common.service_type }}-master-api" - "{{ openshift.common.service_type }}-master-controllers" - "{{ openshift.common.service_type }}-node" diff --git a/roles/openshift_node_upgrade/tasks/restart.yml b/roles/openshift_node_upgrade/tasks/restart.yml index f228b6e08..a4fa51172 100644 --- a/roles/openshift_node_upgrade/tasks/restart.yml +++ b/roles/openshift_node_upgrade/tasks/restart.yml @@ -31,7 +31,6 @@ with_items: - etcd_container - openvswitch - - "{{ openshift.common.service_type }}-master" - "{{ openshift.common.service_type }}-master-api" - "{{ openshift.common.service_type }}-master-controllers" - "{{ openshift.common.service_type }}-node" diff --git a/roles/openshift_provisioners/tasks/install_efs.yaml b/roles/openshift_provisioners/tasks/install_efs.yaml index b53b6afa1..4a6e00513 100644 --- a/roles/openshift_provisioners/tasks/install_efs.yaml +++ b/roles/openshift_provisioners/tasks/install_efs.yaml @@ -67,4 +67,4 @@ register: efs_output failed_when: efs_output.rc == 1 and 'exists' not in efs_output.stderr check_mode: no - when: efs_anyuid.stdout.find("system:serviceaccount:{{openshift_provisioners_project}}:provisioners-efs") == -1 + when: efs_anyuid.stdout.find("system:serviceaccount:" + openshift_provisioners_project + ":provisioners-efs") == -1 diff --git a/roles/openshift_service_catalog/tasks/wire_aggregator.yml b/roles/openshift_service_catalog/tasks/wire_aggregator.yml index d5291a99a..1c788470a 100644 --- a/roles/openshift_service_catalog/tasks/wire_aggregator.yml +++ b/roles/openshift_service_catalog/tasks/wire_aggregator.yml @@ -156,24 +156,16 @@ register: yedit_output #restart master serially here -- name: restart master - systemd: name={{ openshift.common.service_type }}-master state=restarted - when: - - yedit_output.changed - - openshift.master.ha is not defined or not openshift.master.ha | bool - - name: restart master api systemd: name={{ openshift.common.service_type }}-master-api state=restarted when: - yedit_output.changed - - openshift.master.ha is defined and openshift.master.ha | bool - openshift.master.cluster_method == 'native' - name: restart master controllers systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted when: - yedit_output.changed - - openshift.master.ha is defined and openshift.master.ha | bool - openshift.master.cluster_method == 'native' - name: Verify API Server diff --git a/roles/openshift_storage_nfs/defaults/main.yml b/roles/openshift_storage_nfs/defaults/main.yml index 7f3c054e7..4a2bc6141 100644 --- a/roles/openshift_storage_nfs/defaults/main.yml +++ b/roles/openshift_storage_nfs/defaults/main.yml @@ -1,4 +1,12 @@ --- +r_openshift_storage_nfs_firewall_enabled: True +r_openshift_storage_nfs_use_firewalld: False + +r_openshift_storage_nfs_os_firewall_deny: [] +r_openshift_storage_nfs_os_firewall_allow: +- service: nfs + port: "2049/tcp" + openshift: hosted: registry: diff --git a/roles/openshift_storage_nfs/meta/main.yml b/roles/openshift_storage_nfs/meta/main.yml index 62e38bd8c..b360d0658 100644 --- a/roles/openshift_storage_nfs/meta/main.yml +++ b/roles/openshift_storage_nfs/meta/main.yml @@ -10,9 +10,6 @@ galaxy_info: versions: - 7 dependencies: -- role: os_firewall - os_firewall_allow: - - service: nfs - port: "2049/tcp" +- role: lib_os_firewall - role: openshift_hosted_facts - role: openshift_repos diff --git a/roles/openshift_storage_nfs/tasks/firewall.yml b/roles/openshift_storage_nfs/tasks/firewall.yml new file mode 100644 index 000000000..c1c318ff4 --- /dev/null +++ b/roles/openshift_storage_nfs/tasks/firewall.yml @@ -0,0 +1,40 @@ +--- +- when: r_openshift_storage_nfs_firewall_enabled | bool and not r_openshift_storage_nfs_use_firewalld | bool + block: + - name: Add iptables allow rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: add + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_storage_nfs_os_firewall_allow }}" + + - name: Remove iptables rules + os_firewall_manage_iptables: + name: "{{ item.service }}" + action: remove + protocol: "{{ item.port.split('/')[1] }}" + port: "{{ item.port.split('/')[0] }}" + when: item.cond | default(True) + with_items: "{{ r_openshift_storage_nfs_os_firewall_deny }}" + +- when: r_openshift_storage_nfs_firewall_enabled | bool and r_openshift_storage_nfs_use_firewalld | bool + block: + - name: Add firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: enabled + when: item.cond | default(True) + with_items: "{{ r_openshift_storage_nfs_os_firewall_allow }}" + + - name: Remove firewalld allow rules + firewalld: + port: "{{ item.port }}" + permanent: true + immediate: true + state: disabled + when: item.cond | default(True) + with_items: "{{ r_openshift_storage_nfs_os_firewall_deny }}" diff --git a/roles/openshift_storage_nfs/tasks/main.yml b/roles/openshift_storage_nfs/tasks/main.yml index 019ada2fb..51f8f4e0e 100644 --- a/roles/openshift_storage_nfs/tasks/main.yml +++ b/roles/openshift_storage_nfs/tasks/main.yml @@ -1,4 +1,8 @@ --- +- name: setup firewall + include: firewall.yml + static: yes + - name: Install nfs-utils package: name=nfs-utils state=present diff --git a/roles/os_firewall/README.md b/roles/os_firewall/README.md index e7ef544f4..be0b8291a 100644 --- a/roles/os_firewall/README.md +++ b/roles/os_firewall/README.md @@ -1,8 +1,8 @@ OS Firewall =========== -OS Firewall manages firewalld and iptables firewall settings for a minimal use -case (Adding/Removing rules based on protocol and port number). +OS Firewall manages firewalld and iptables installation. +case. Note: firewalld is not supported on Atomic Host https://bugzilla.redhat.com/show_bug.cgi?id=1403331 @@ -18,8 +18,6 @@ Role Variables | Name | Default | | |---------------------------|---------|----------------------------------------| | os_firewall_use_firewalld | False | If false, use iptables | -| os_firewall_allow | [] | List of service,port mappings to allow | -| os_firewall_deny | [] | List of service, port mappings to deny | Dependencies ------------ @@ -29,34 +27,27 @@ None. Example Playbook ---------------- -Use iptables and open tcp ports 80 and 443: +Use iptables: ``` --- - hosts: servers - vars: - os_firewall_use_firewalld: false - os_firewall_allow: - - service: httpd - port: 80/tcp - - service: https - port: 443/tcp - roles: - - os_firewall + task: + - include_role: + name: os_firewall + vars: + os_firewall_use_firewalld: false ``` -Use firewalld and open tcp port 443 and close previously open tcp port 80: +Use firewalld: ``` --- - hosts: servers vars: - os_firewall_allow: - - service: https - port: 443/tcp - os_firewall_deny: - - service: httpd - port: 80/tcp - roles: - - os_firewall + tasks: + - include_role: + name: os_firewall + vars: + os_firewall_use_firewalld: true ``` License diff --git a/roles/os_firewall/defaults/main.yml b/roles/os_firewall/defaults/main.yml index 01859e5fc..f96a80f1c 100644 --- a/roles/os_firewall/defaults/main.yml +++ b/roles/os_firewall/defaults/main.yml @@ -3,5 +3,3 @@ os_firewall_enabled: True # firewalld is not supported on Atomic Host # https://bugzilla.redhat.com/show_bug.cgi?id=1403331 os_firewall_use_firewalld: "{{ False }}" -os_firewall_allow: [] -os_firewall_deny: [] diff --git a/roles/os_firewall/tasks/firewall/firewalld.yml b/roles/os_firewall/tasks/firewall/firewalld.yml index 509655b0c..2cc7af478 100644 --- a/roles/os_firewall/tasks/firewall/firewalld.yml +++ b/roles/os_firewall/tasks/firewall/firewalld.yml @@ -49,19 +49,3 @@ until: pkaction.rc == 0 retries: 6 delay: 10 - -- name: Add firewalld allow rules - firewalld: - port: "{{ item.port }}" - permanent: true - immediate: true - state: enabled - with_items: "{{ os_firewall_allow }}" - -- name: Remove firewalld allow rules - firewalld: - port: "{{ item.port }}" - permanent: true - immediate: true - state: disabled - with_items: "{{ os_firewall_deny }}" diff --git a/roles/os_firewall/tasks/firewall/iptables.yml b/roles/os_firewall/tasks/firewall/iptables.yml index 55f2fc471..ccb3c4713 100644 --- a/roles/os_firewall/tasks/firewall/iptables.yml +++ b/roles/os_firewall/tasks/firewall/iptables.yml @@ -33,19 +33,3 @@ - name: need to pause here, otherwise the iptables service starting can sometimes cause ssh to fail pause: seconds=10 when: result | changed - -- name: Add iptables allow rules - os_firewall_manage_iptables: - name: "{{ item.service }}" - action: add - protocol: "{{ item.port.split('/')[1] }}" - port: "{{ item.port.split('/')[0] }}" - with_items: "{{ os_firewall_allow }}" - -- name: Remove iptables rules - os_firewall_manage_iptables: - name: "{{ item.service }}" - action: remove - protocol: "{{ item.port.split('/')[1] }}" - port: "{{ item.port.split('/')[0] }}" - with_items: "{{ os_firewall_deny }}" |