diff options
81 files changed, 1012 insertions, 596 deletions
diff --git a/README_libvirt.md b/README_libvirt.md index c523d83fb..1661681a0 100644 --- a/README_libvirt.md +++ b/README_libvirt.md @@ -15,7 +15,7 @@ Install dependencies 3. Install [ebtables](http://ebtables.netfilter.org/) 4. Install [qemu and qemu-system-x86](http://wiki.qemu.org/Main_Page) 5. Install [libvirt-python and libvirt](http://libvirt.org/) -6. Install [genisoimage](http://cdrkit.org/) +6. Install [genisoimage](http://cdrkit.org/) or [mkisofs](http://cdrtools.sourceforge.net/private/cdrecord.html) 7. Enable and start the libvirt daemon, e.g: - `systemctl enable libvirtd` - `systemctl start libvirtd` @@ -23,6 +23,7 @@ Install dependencies 9. Check that your `$HOME` is accessible to the qemu user² 10. Configure dns resolution on the host³ 11. Install libselinux-python +12. Ensure you have an SSH private and public keypair at `~/.ssh/id_rsa` and `~/.ssh/id_rsa.pub`⁴ #### ¹ Depending on your distribution, libvirt access may be denied by default or may require a password at each access. @@ -103,6 +104,11 @@ sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf server=/example.com/192.168.55.1 ``` +#### ⁴ Private and public keypair in ~/.ssh/id_rsa and ~/.ssh/id_rsa.pub + +This playbook uses SSH keys to communicate with the libvirt-driven virtual machines. At this time the names of those keys are fixed and cannot be changed. + + Test The Setup -------------- diff --git a/filter_plugins/oo_filters.py b/filter_plugins/oo_filters.py index c6d0e69eb..36a90a870 100644 --- a/filter_plugins/oo_filters.py +++ b/filter_plugins/oo_filters.py @@ -194,10 +194,10 @@ def oo_select_keys_from_list(data, keys): """ if not isinstance(data, list): - raise errors.AnsibleFilterError("|failed expects to filter on a list") + raise errors.AnsibleFilterError("|oo_select_keys_from_list failed expects to filter on a list") if not isinstance(keys, list): - raise errors.AnsibleFilterError("|failed expects first param is a list") + raise errors.AnsibleFilterError("|oo_select_keys_from_list failed expects first param is a list") # Gather up the values for the list of keys passed in retval = [oo_select_keys(item, keys) for item in data] @@ -213,10 +213,10 @@ def oo_select_keys(data, keys): """ if not isinstance(data, Mapping): - raise errors.AnsibleFilterError("|failed expects to filter on a dict or object") + raise errors.AnsibleFilterError("|oo_select_keys failed expects to filter on a dict or object") if not isinstance(keys, list): - raise errors.AnsibleFilterError("|failed expects first param is a list") + raise errors.AnsibleFilterError("|oo_select_keys failed expects first param is a list") # Gather up the values for the list of keys passed in retval = [data[key] for key in keys if key in data] diff --git a/inventory/byo/hosts.byo.glusterfs.external.example b/inventory/byo/hosts.byo.glusterfs.external.example index 628d3a3f7..5a284ce97 100644 --- a/inventory/byo/hosts.byo.glusterfs.external.example +++ b/inventory/byo/hosts.byo.glusterfs.external.example @@ -31,13 +31,13 @@ openshift_storage_glusterfs_is_native=False openshift_storage_glusterfs_heketi_url=172.0.0.1 [masters] -master node=True storage=True master=True +master [nodes] -master node=True storage=True master=True openshift_schedulable=False -node0 node=True openshift_schedulable=True -node1 node=True openshift_schedulable=True -node2 node=True openshift_schedulable=True +master openshift_schedulable=False +node0 openshift_schedulable=True +node1 openshift_schedulable=True +node2 openshift_schedulable=True # Specify the glusterfs group, which contains the nodes of the external # GlusterFS cluster. At a minimum, each node must have "glusterfs_hostname" diff --git a/inventory/byo/hosts.byo.glusterfs.mixed.example b/inventory/byo/hosts.byo.glusterfs.mixed.example index fd47cb9d5..d16df6470 100644 --- a/inventory/byo/hosts.byo.glusterfs.mixed.example +++ b/inventory/byo/hosts.byo.glusterfs.mixed.example @@ -34,13 +34,13 @@ openshift_storage_glusterfs_heketi_is_native=True openshift_storage_glusterfs_heketi_executor=ssh openshift_storage_glusterfs_heketi_ssh_keyfile=/root/id_rsa [masters] -master node=True storage=True master=True +master [nodes] -master node=True storage=True master=True openshift_schedulable=False -node0 node=True openshift_schedulable=True -node1 node=True openshift_schedulable=True -node2 node=True openshift_schedulable=True +master openshift_schedulable=False +node0 openshift_schedulable=True +node1 openshift_schedulable=True +node2 openshift_schedulable=True # Specify the glusterfs group, which contains the nodes of the external # GlusterFS cluster. At a minimum, each node must have "glusterfs_hostname" diff --git a/inventory/byo/hosts.byo.glusterfs.native.example b/inventory/byo/hosts.byo.glusterfs.native.example index a3e2570c9..c1a1f6f84 100644 --- a/inventory/byo/hosts.byo.glusterfs.native.example +++ b/inventory/byo/hosts.byo.glusterfs.native.example @@ -24,15 +24,15 @@ ansible_ssh_user=root openshift_deployment_type=origin [masters] -master node=True storage=True master=True +master [nodes] -master node=True storage=True master=True openshift_schedulable=False +master openshift_schedulable=False # A hosted registry, by default, will only be deployed on nodes labeled # "region=infra". -node0 node=True openshift_schedulable=True -node1 node=True openshift_schedulable=True -node2 node=True openshift_schedulable=True +node0 openshift_schedulable=True +node1 openshift_schedulable=True +node2 openshift_schedulable=True # Specify the glusterfs group, which contains the nodes that will host # GlusterFS storage pods. At a minimum, each node must have a diff --git a/inventory/byo/hosts.byo.glusterfs.registry-only.example b/inventory/byo/hosts.byo.glusterfs.registry-only.example index 999518abe..31a85ee42 100644 --- a/inventory/byo/hosts.byo.glusterfs.registry-only.example +++ b/inventory/byo/hosts.byo.glusterfs.registry-only.example @@ -30,15 +30,15 @@ openshift_deployment_type=origin openshift_hosted_registry_storage_kind=glusterfs [masters] -master node=True storage=True master=True +master [nodes] -master node=True storage=True master=True openshift_schedulable=False +master openshift_schedulable=False # A hosted registry, by default, will only be deployed on nodes labeled # "region=infra". -node0 node=True openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True -node1 node=True openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True -node2 node=True openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True +node0 openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True +node1 openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True +node2 openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True # Specify the glusterfs group, which contains the nodes that will host # GlusterFS storage pods. At a minimum, each node must have a diff --git a/inventory/byo/hosts.byo.glusterfs.storage-and-registry.example b/inventory/byo/hosts.byo.glusterfs.storage-and-registry.example index 1df79301a..54bd89ddc 100644 --- a/inventory/byo/hosts.byo.glusterfs.storage-and-registry.example +++ b/inventory/byo/hosts.byo.glusterfs.storage-and-registry.example @@ -31,20 +31,20 @@ openshift_deployment_type=origin openshift_hosted_registry_storage_kind=glusterfs [masters] -master node=True storage=True master=True +master [nodes] -master node=True storage=True master=True openshift_schedulable=False +master openshift_schedulable=False # It is recommended to not use a single cluster for both general and registry # storage, so two three-node clusters will be required. -node0 node=True openshift_schedulable=True -node1 node=True openshift_schedulable=True -node2 node=True openshift_schedulable=True +node0 openshift_schedulable=True +node1 openshift_schedulable=True +node2 openshift_schedulable=True # A hosted registry, by default, will only be deployed on nodes labeled # "region=infra". -node3 node=True openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True -node4 node=True openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True -node5 node=True openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True +node3 openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True +node4 openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True +node5 openshift_node_labels="{'region': 'infra'}" openshift_schedulable=True # Specify the glusterfs group, which contains the nodes that will host # GlusterFS storage pods. At a minimum, each node must have a diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 6e53b4fd9..de7493f71 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -156,8 +156,9 @@ openshift_release=v3.6 # modify image streams to point at that registry by setting the following to true #openshift_examples_modify_imagestreams=true -# Origin copr repo +# OpenShift repository configuration #openshift_additional_repos=[{'id': 'openshift-origin-copr', 'name': 'OpenShift Origin COPR', 'baseurl': 'https://copr-be.cloud.fedoraproject.org/results/maxamillion/origin-next/epel-7-$basearch/', 'enabled': 1, 'gpgcheck': 1, 'gpgkey': 'https://copr-be.cloud.fedoraproject.org/results/maxamillion/origin-next/pubkey.gpg'}] +#openshift_repos_enable_testing=false # htpasswd auth openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}] @@ -882,6 +883,14 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 # where as this would not # openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 +# +# Multiple data migrations take place and if they fail they will fail the upgrade +# You may wish to disable these or make them non fatal +# +# openshift_upgrade_pre_storage_migration_enabled=true +# openshift_upgrade_pre_storage_migration_fatal==true +# openshift_upgrade_post_storage_migration_enabled=true +# openshift_upgrade_post_storage_migration_fatal==false # host group for masters [masters] diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index e3e9220fc..62a364e0d 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -155,8 +155,9 @@ openshift_release=v3.6 # modify image streams to point at that registry by setting the following to true #openshift_examples_modify_imagestreams=true -# Additional yum repos to install +# OpenShift repository configuration #openshift_additional_repos=[{'id': 'ose-devel', 'name': 'ose-devel', 'baseurl': 'http://example.com/puddle/build/AtomicOpenShift/3.1/latest/RH7-RHOSE-3.0/$basearch/os', 'enabled': 1, 'gpgcheck': 0}] +#openshift_repos_enable_testing=false # htpasswd auth openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}] @@ -878,6 +879,14 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 # where as this would not # openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 +# +# Multiple data migrations take place and if they fail they will fail the upgrade +# You may wish to disable these or make them non fatal +# +# openshift_upgrade_pre_storage_migration_enabled=true +# openshift_upgrade_pre_storage_migration_fatal==true +# openshift_upgrade_post_storage_migration_enabled=true +# openshift_upgrade_post_storage_migration_fatal==false # host group for masters [masters] diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 2b2f10aee..695dc3140 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -12,6 +12,12 @@ command: > {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig migrate storage --include=* --confirm + register: l_pb_upgrade_control_plane_pre_upgrade_storage + when: openshift_upgrade_pre_storage_migration_enabled | default(true,true) | bool + failed_when: + - openshift_upgrade_pre_storage_migration_enabled | default(true,true) | bool + - l_pb_upgrade_control_plane_pre_upgrade_storage.rc != 0 + - openshift_upgrade_pre_storage_migration_fatal | default(true,true) | bool # If facts cache were for some reason deleted, this fact may not be set, and if not set # it will always default to true. This causes problems for the etcd data dir fact detection @@ -140,16 +146,21 @@ - include: "{{ openshift_master_upgrade_post_hook }}" when: openshift_master_upgrade_post_hook is defined - - set_fact: - master_update_complete: True - -- name: Post master upgrade - Upgrade clusterpolicies storage - hosts: oo_first_master - tasks: - - name: Upgrade clusterpolicies storage + - name: Post master upgrade - Upgrade clusterpolicies storage command: > {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig migrate storage --include=clusterpolicies --confirm + register: l_pb_upgrade_control_plane_post_upgrade_storage + when: openshift_upgrade_post_storage_migration_enabled | default(true,true) | bool + failed_when: + - openshift_upgrade_post_storage_migration_enabled | default(true,true) | bool + - l_pb_upgrade_control_plane_post_upgrade_storage.rc != 0 + - openshift_upgrade_post_storage_migration_fatal | default(false,true) | bool + run_once: true + delegate_to: oo_first_master + + - set_fact: + master_update_complete: True ############################################################################## # Gate on master update complete @@ -230,11 +241,17 @@ - reconcile_scc_result.rc == 0 run_once: true - - name: Upgrade job storage + - name: Migrate storage post policy reconciliation command: > {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig migrate storage --include=* --confirm run_once: true + register: l_pb_upgrade_control_plane_post_upgrade_storage + when: openshift_upgrade_post_storage_migration_enabled | default(true,true) | bool + failed_when: + - openshift_upgrade_post_storage_migration_enabled | default(true,true) | bool + - l_pb_upgrade_control_plane_post_upgrade_storage.rc != 0 + - openshift_upgrade_post_storage_migration_fatal | default(false,true) | bool - set_fact: reconcile_complete: True diff --git a/playbooks/common/openshift-glusterfs/config.yml b/playbooks/common/openshift-glusterfs/config.yml index 1efdfb336..edc15a3f2 100644 --- a/playbooks/common/openshift-glusterfs/config.yml +++ b/playbooks/common/openshift-glusterfs/config.yml @@ -1,6 +1,6 @@ --- -- name: Open firewall ports for GlusterFS - hosts: oo_glusterfs_to_config +- name: Open firewall ports for GlusterFS nodes + hosts: glusterfs vars: os_firewall_allow: - service: glusterfs_sshd @@ -14,7 +14,24 @@ roles: - role: os_firewall when: - - openshift_storage_glusterfs_is_native | default(True) + - openshift_storage_glusterfs_is_native | default(True) | bool + +- name: Open firewall ports for GlusterFS registry nodes + hosts: glusterfs_registry + vars: + os_firewall_allow: + - service: glusterfs_sshd + port: "2222/tcp" + - service: glusterfs_daemon + port: "24007/tcp" + - service: glusterfs_management + port: "24008/tcp" + - service: glusterfs_bricks + port: "49152-49251/tcp" + roles: + - role: os_firewall + when: + - openshift_storage_glusterfs_registry_is_native | default(True) | bool - name: Configure GlusterFS hosts: oo_first_master diff --git a/playbooks/common/openshift-loadbalancer/config.yml b/playbooks/common/openshift-loadbalancer/config.yml index c414913bf..2dacc1218 100644 --- a/playbooks/common/openshift-loadbalancer/config.yml +++ b/playbooks/common/openshift-loadbalancer/config.yml @@ -12,5 +12,6 @@ openshift_use_nuage | default(false), nuage_mon_rest_server_port | default(none))) + openshift_loadbalancer_additional_backends | default([]) }}" + openshift_image_tag: "{{ hostvars[groups.oo_first_master.0].openshift_image_tag }}" roles: - role: openshift_loadbalancer diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index 7d3a371e3..5de03951c 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -127,6 +127,9 @@ | union(groups['oo_etcd_to_config'] | default([]))) | oo_collect('openshift.common.hostname') | default([]) | join (',') }}" + openshift_no_proxy_etcd_host_ips: "{{ hostvars | oo_select_keys(groups['oo_etcd_to_config'] | default([])) + | oo_collect('openshift.common.ip') | default([]) | join(',') + }}" roles: - role: openshift_master openshift_ca_host: "{{ groups.oo_first_master.0 }}" diff --git a/playbooks/libvirt/openshift-cluster/tasks/launch_instances.yml b/playbooks/libvirt/openshift-cluster/tasks/launch_instances.yml index ccd29be29..4df86effa 100644 --- a/playbooks/libvirt/openshift-cluster/tasks/launch_instances.yml +++ b/playbooks/libvirt/openshift-cluster/tasks/launch_instances.yml @@ -49,11 +49,15 @@ - '{{ instances }}' - [ user-data, meta-data ] +- name: Check for genisoimage + command: which genisoimage + register: which_genisoimage + - name: Create the cloud-init config drive - command: 'genisoimage -output {{ libvirt_storage_pool_path }}/{{ item }}_cloud-init.iso -volid cidata -joliet -rock user-data meta-data' + command: "{{ 'genisoimage' if which_genisoimage.rc == 0 else 'mkisofs' }} -output {{ libvirt_storage_pool_path }}/{{ item }}_cloud-init.iso -volid cidata -joliet -rock user-data meta-data" args: - chdir: '{{ libvirt_storage_pool_path }}/{{ item }}_configdrive/' - creates: '{{ libvirt_storage_pool_path }}/{{ item }}_cloud-init.iso' + chdir: "{{ libvirt_storage_pool_path }}/{{ item }}_configdrive/" + creates: "{{ libvirt_storage_pool_path }}/{{ item }}_cloud-init.iso" with_items: '{{ instances }}' - name: Refresh the libvirt storage pool for openshift diff --git a/roles/ansible_service_broker/vars/openshift-enterprise.yml b/roles/ansible_service_broker/vars/openshift-enterprise.yml index f672760aa..0b3a2a69d 100644 --- a/roles/ansible_service_broker/vars/openshift-enterprise.yml +++ b/roles/ansible_service_broker/vars/openshift-enterprise.yml @@ -1,6 +1,6 @@ --- -__ansible_service_broker_image_prefix: registry.access.redhat.com/openshift3/ +__ansible_service_broker_image_prefix: registry.access.redhat.com/openshift3/ose- __ansible_service_broker_image_tag: latest __ansible_service_broker_etcd_image_prefix: rhel7/ diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index f0661209f..8c2f392ee 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -14,7 +14,8 @@ name: etcd_common vars: r_etcd_common_action: drop_etcdctl - when: openshift_etcd_etcdctl_profile | default(true) | bool + when: + - openshift_etcd_etcdctl_profile | default(true) | bool - block: - name: Pull etcd container diff --git a/roles/openshift_default_storage_class/defaults/main.yml b/roles/openshift_default_storage_class/defaults/main.yml index 8d07dbecc..bdece7640 100644 --- a/roles/openshift_default_storage_class/defaults/main.yml +++ b/roles/openshift_default_storage_class/defaults/main.yml @@ -1,7 +1,7 @@ --- openshift_storageclass_defaults: aws: - provisioner: kubernetes.io/aws-ebs + provisioner: aws-ebs name: gp2 parameters: type: gp2 @@ -9,7 +9,7 @@ openshift_storageclass_defaults: encrypted: 'false' gce: name: standard - provisioner: kubernetes.io/gce-pd + provisioner: gce-pd parameters: type: pd-standard diff --git a/roles/openshift_default_storage_class/tasks/main.yml b/roles/openshift_default_storage_class/tasks/main.yml index 8e14e67dc..172e2ac25 100644 --- a/roles/openshift_default_storage_class/tasks/main.yml +++ b/roles/openshift_default_storage_class/tasks/main.yml @@ -5,4 +5,5 @@ name: "{{ openshift_storageclass_name }}" default_storage_class: "{{ openshift_storageclass_default | default('true') | string}}" parameters: "{{ openshift_storageclass_parameters }}" + provisioner: "{{ openshift_storageclass_provisioner }}" run_once: true diff --git a/roles/openshift_excluder/tasks/exclude.yml b/roles/openshift_excluder/tasks/exclude.yml index 934f1b2d2..1b4818df9 100644 --- a/roles/openshift_excluder/tasks/exclude.yml +++ b/roles/openshift_excluder/tasks/exclude.yml @@ -5,7 +5,7 @@ register: docker_excluder_stat - name: Enable docker excluder - command: "{{ r_openshift_excluder_service_type }}-docker-excluder exclude" + command: "/sbin/{{ r_openshift_excluder_service_type }}-docker-excluder exclude" when: - r_openshift_excluder_enable_docker_excluder | bool - docker_excluder_stat.stat.exists @@ -16,7 +16,7 @@ register: openshift_excluder_stat - name: Enable openshift excluder - command: "{{ r_openshift_excluder_service_type }}-excluder exclude" + command: "/sbin/{{ r_openshift_excluder_service_type }}-excluder exclude" when: - r_openshift_excluder_enable_openshift_excluder | bool - openshift_excluder_stat.stat.exists diff --git a/roles/openshift_excluder/tasks/unexclude.yml b/roles/openshift_excluder/tasks/unexclude.yml index a5ce8d5c7..a68165bde 100644 --- a/roles/openshift_excluder/tasks/unexclude.yml +++ b/roles/openshift_excluder/tasks/unexclude.yml @@ -9,7 +9,7 @@ register: docker_excluder_stat - name: disable docker excluder - command: "{{ r_openshift_excluder_service_type }}-docker-excluder unexclude" + command: "/sbin/{{ r_openshift_excluder_service_type }}-docker-excluder unexclude" when: - unexclude_docker_excluder | default(false) | bool - docker_excluder_stat.stat.exists @@ -20,7 +20,7 @@ register: openshift_excluder_stat - name: disable openshift excluder - command: "{{ r_openshift_excluder_service_type }}-excluder unexclude" + command: "/sbin/{{ r_openshift_excluder_service_type }}-excluder unexclude" when: - unexclude_openshift_excluder | default(false) | bool - openshift_excluder_stat.stat.exists diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 4712ca3a8..49cc51b48 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -1647,6 +1647,13 @@ def set_proxy_facts(facts): common['no_proxy'] = common['no_proxy'].split(",") elif 'no_proxy' not in common: common['no_proxy'] = [] + + # See https://bugzilla.redhat.com/show_bug.cgi?id=1466783 + # masters behind a proxy need to connect to etcd via IP + if 'no_proxy_etcd_host_ips' in common: + if isinstance(common['no_proxy_etcd_host_ips'], string_types): + common['no_proxy'].extend(common['no_proxy_etcd_host_ips'].split(',')) + if 'generate_no_proxy_hosts' in common and safe_get_bool(common['generate_no_proxy_hosts']): if 'no_proxy_internal_hostnames' in common: common['no_proxy'].extend(common['no_proxy_internal_hostnames'].split(',')) diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index 0390dc82e..581dd7d15 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -37,7 +37,7 @@ class ActionModule(ActionBase): return result try: - known_checks = self.load_known_checks() + known_checks = self.load_known_checks(tmp, task_vars) args = self._task.args resolved_checks = resolve_checks(args.get("checks", []), known_checks.values()) except OpenShiftCheckException as e: @@ -56,13 +56,13 @@ class ActionModule(ActionBase): display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"])) check = known_checks[check_name] - if not check.is_active(task_vars): + if not check.is_active(): r = dict(skipped=True, skipped_reason="Not active for this host") elif check_name in user_disabled_checks: r = dict(skipped=True, skipped_reason="Disabled by user request") else: try: - r = check.run(tmp, task_vars) + r = check.run() except OpenShiftCheckException as e: r = dict( failed=True, @@ -78,7 +78,7 @@ class ActionModule(ActionBase): result["changed"] = any(r.get("changed", False) for r in check_results.values()) return result - def load_known_checks(self): + def load_known_checks(self, tmp, task_vars): load_checks() known_checks = {} @@ -91,7 +91,7 @@ class ActionModule(ActionBase): check_name, cls.__module__, cls.__name__, other_cls.__module__, other_cls.__name__)) - known_checks[check_name] = cls(execute_module=self._execute_module) + known_checks[check_name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars) return known_checks diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 443b76ea1..d10200719 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -1,6 +1,6 @@ -''' -Ansible callback plugin. -''' +""" +Ansible callback plugin to give a nicely formatted summary of failures. +""" # Reason: In several locations below we disable pylint protected-access # for Ansible objects that do not give us any public way @@ -16,11 +16,11 @@ from ansible.utils.color import stringc class CallbackModule(CallbackBase): - ''' + """ This callback plugin stores task results and summarizes failures. The file name is prefixed with `zz_` to make this plugin be loaded last by Ansible, thus making its output the last thing that users see. - ''' + """ CALLBACK_VERSION = 2.0 CALLBACK_TYPE = 'aggregate' @@ -48,7 +48,7 @@ class CallbackModule(CallbackBase): self._print_failure_details(self.__failures) def _print_failure_details(self, failures): - '''Print a summary of failed tasks or checks.''' + """Print a summary of failed tasks or checks.""" self._display.display(u'\nFailure summary:\n') width = len(str(len(failures))) @@ -69,7 +69,9 @@ class CallbackModule(CallbackBase): playbook_context = None # re: result attrs see top comment # pylint: disable=protected-access for failure in failures: - # get context from check task result since callback plugins cannot access task vars + # Get context from check task result since callback plugins cannot access task vars. + # NOTE: thus context is not known unless checks run. Failures prior to checks running + # don't have playbook_context in the results. But we only use it now when checks fail. playbook_context = playbook_context or failure['result']._result.get('playbook_context') failed_checks.update( name @@ -81,8 +83,11 @@ class CallbackModule(CallbackBase): def _print_check_failure_summary(self, failed_checks, context): checks = ','.join(sorted(failed_checks)) - # NOTE: context is not set if all failures occurred prior to checks task - summary = ( + # The purpose of specifying context is to vary the output depending on what the user was + # expecting to happen (based on which playbook they ran). The only use currently is to + # vary the message depending on whether the user was deliberately running checks or was + # trying to install/upgrade and checks are just included. Other use cases may arise. + summary = ( # default to explaining what checks are in the first place '\n' 'The execution of "{playbook}"\n' 'includes checks designed to fail early if the requirements\n' @@ -94,27 +99,26 @@ class CallbackModule(CallbackBase): 'Some checks may be configurable by variables if your requirements\n' 'are different from the defaults; consult check documentation.\n' 'Variables can be set in the inventory or passed on the\n' - 'command line using the -e flag to ansible-playbook.\n' + 'command line using the -e flag to ansible-playbook.\n\n' ).format(playbook=self._playbook_file, checks=checks) if context in ['pre-install', 'health']: - summary = ( + summary = ( # user was expecting to run checks, less explanation needed '\n' 'You may choose to configure or disable failing checks by\n' 'setting Ansible variables. To disable those above:\n\n' ' openshift_disable_check={checks}\n\n' 'Consult check documentation for configurable variables.\n' 'Variables can be set in the inventory or passed on the\n' - 'command line using the -e flag to ansible-playbook.\n' + 'command line using the -e flag to ansible-playbook.\n\n' ).format(checks=checks) - # other expected contexts: install, upgrade self._display.display(summary) # re: result attrs see top comment # pylint: disable=protected-access def _format_failure(failure): - '''Return a list of pretty-formatted text entries describing a failure, including + """Return a list of pretty-formatted text entries describing a failure, including relevant information about it. Expect that the list of text entries will be joined - by a newline separator when output to the user.''' + by a newline separator when output to the user.""" result = failure['result'] host = result._host.get_name() play = _get_play(result._task) @@ -135,7 +139,7 @@ def _format_failure(failure): def _format_failed_checks(checks): - '''Return pretty-formatted text describing checks that failed.''' + """Return pretty-formatted text describing checks that failed.""" failed_check_msgs = [] for check, body in checks.items(): if body.get('failed', False): # only show the failed checks @@ -150,7 +154,7 @@ def _format_failed_checks(checks): # This is inspired by ansible.playbook.base.Base.dump_me. # re: play/task/block attrs see top comment # pylint: disable=protected-access def _get_play(obj): - '''Given a task or block, recursively tries to find its parent play.''' + """Given a task or block, recursively try to find its parent play.""" if hasattr(obj, '_play'): return obj._play if getattr(obj, '_parent'): diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py index 4f43ee751..f9babebb9 100755..100644 --- a/roles/openshift_health_checker/library/aos_version.py +++ b/roles/openshift_health_checker/library/aos_version.py @@ -1,5 +1,5 @@ #!/usr/bin/python -''' +""" Ansible module for yum-based systems determining if multiple releases of an OpenShift package are available, and if the release requested (if any) is available down to the given precision. @@ -16,7 +16,7 @@ of release availability already. Without duplicating all that, we would like the user to have a helpful error message if we detect things will not work out right. Note that if openshift_release is not specified in the inventory, the version comparison checks just pass. -''' +""" from ansible.module_utils.basic import AnsibleModule # NOTE: because of the dependency on yum (Python 2-only), this module does not @@ -32,7 +32,7 @@ except ImportError as err: class AosVersionException(Exception): - '''Base exception class for package version problems''' + """Base exception class for package version problems""" def __init__(self, message, problem_pkgs=None): Exception.__init__(self, message) self.problem_pkgs = problem_pkgs diff --git a/roles/openshift_health_checker/library/check_yum_update.py b/roles/openshift_health_checker/library/check_yum_update.py index 433795b67..433795b67 100755..100644 --- a/roles/openshift_health_checker/library/check_yum_update.py +++ b/roles/openshift_health_checker/library/check_yum_update.py diff --git a/roles/openshift_health_checker/library/docker_info.py b/roles/openshift_health_checker/library/docker_info.py index 7f712bcff..0d0ddae8b 100644 --- a/roles/openshift_health_checker/library/docker_info.py +++ b/roles/openshift_health_checker/library/docker_info.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-docstring """ Ansible module for determining information about the docker host. @@ -13,6 +12,7 @@ from ansible.module_utils.docker_common import AnsibleDockerClient def main(): + """Entrypoint for running an Ansible module.""" client = AnsibleDockerClient() client.module.exit_json( diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 5c9949ced..40a28cde5 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -19,15 +19,21 @@ class OpenShiftCheckException(Exception): @six.add_metaclass(ABCMeta) class OpenShiftCheck(object): - """A base class for defining checks for an OpenShift cluster environment.""" + """ + A base class for defining checks for an OpenShift cluster environment. + + Expect optional params: method execute_module, dict task_vars, and string tmp. + execute_module is expected to have a signature compatible with _execute_module + from ansible plugins/action/__init__.py, e.g.: + def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *args): + This is stored so that it can be invoked in subclasses via check.execute_module("name", args) + which provides the check's stored task_vars and tmp. + """ - def __init__(self, execute_module=None, module_executor=None): - if execute_module is module_executor is None: - raise TypeError( - "__init__() takes either execute_module (recommended) " - "or module_executor (deprecated), none given") - self.execute_module = execute_module or module_executor - self.module_executor = self.execute_module + def __init__(self, execute_module=None, task_vars=None, tmp=None): + self._execute_module = execute_module + self.task_vars = task_vars or {} + self.tmp = tmp @abstractproperty def name(self): @@ -43,13 +49,13 @@ class OpenShiftCheck(object): """ return [] - @classmethod - def is_active(cls, task_vars): # pylint: disable=unused-argument + @staticmethod + def is_active(): """Returns true if this check applies to the ansible-playbook run.""" return True @abstractmethod - def run(self, tmp, task_vars): + def run(self): """Executes a check, normally implemented as a module.""" return {} @@ -62,6 +68,43 @@ class OpenShiftCheck(object): for subclass in subclass.subclasses(): yield subclass + def execute_module(self, module_name=None, module_args=None): + """Invoke an Ansible module from a check. + + Invoke stored _execute_module, normally copied from the action + plugin, with its params and the task_vars and tmp given at + check initialization. No positional parameters beyond these + are specified. If it's necessary to specify any of the other + parameters to _execute_module then that should just be invoked + directly (with awareness of changes in method signature per + Ansible version). + + So e.g. check.execute_module("foo", dict(arg1=...)) + Return: result hash from module execution. + """ + if self._execute_module is None: + raise NotImplementedError( + self.__class__.__name__ + + " invoked execute_module without providing the method at initialization." + ) + return self._execute_module(module_name, module_args, self.tmp, self.task_vars) + + def get_var(self, *keys, **kwargs): + """Get deeply nested values from task_vars. + + Ansible task_vars structures are Python dicts, often mapping strings to + other dicts. This helper makes it easier to get a nested value, raising + OpenShiftCheckException when a key is not found or returning a default value + provided as a keyword argument. + """ + try: + value = reduce(operator.getitem, keys, self.task_vars) + except (KeyError, TypeError): + if "default" in kwargs: + return kwargs["default"] + raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys)))) + return value + LOADER_EXCLUDES = ( "__init__.py", @@ -86,20 +129,3 @@ def load_checks(path=None, subpkg=""): modules.append(import_module(__package__ + subpkg + "." + name[:-3])) return modules - - -def get_var(task_vars, *keys, **kwargs): - """Helper function to get deeply nested values from task_vars. - - Ansible task_vars structures are Python dicts, often mapping strings to - other dicts. This helper makes it easier to get a nested value, raising - OpenShiftCheckException when a key is not found or returning a default value - provided as a keyword argument. - """ - try: - value = reduce(operator.getitem, keys, task_vars) - except (KeyError, TypeError): - if "default" in kwargs: - return kwargs["default"] - raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys)))) - return value diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index e93e81efa..283461294 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -3,7 +3,7 @@ import os.path import tempfile -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException class DiskAvailability(OpenShiftCheck): @@ -35,22 +35,21 @@ class DiskAvailability(OpenShiftCheck): }, } - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Skip hosts that do not have recommended disk space requirements.""" - group_names = get_var(task_vars, "group_names", default=[]) + group_names = self.get_var("group_names", default=[]) active_groups = set() - for recommendation in cls.recommended_disk_space_bytes.values(): + for recommendation in self.recommended_disk_space_bytes.values(): active_groups.update(recommendation.keys()) has_disk_space_recommendation = bool(active_groups.intersection(group_names)) - return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation + return super(DiskAvailability, self).is_active() and has_disk_space_recommendation - def run(self, tmp, task_vars): - group_names = get_var(task_vars, "group_names") - ansible_mounts = get_var(task_vars, "ansible_mounts") + def run(self): + group_names = self.get_var("group_names") + ansible_mounts = self.get_var("ansible_mounts") ansible_mounts = {mount['mount']: mount for mount in ansible_mounts} - user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={}) + user_config = self.get_var("openshift_check_min_host_disk_gb", default={}) try: # For backwards-compatibility, if openshift_check_min_host_disk_gb # is a number, then it overrides the required config for '/var'. diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index bde81ad2c..77180223e 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,6 +1,6 @@ """Check that required Docker images are available.""" -from openshift_checks import OpenShiftCheck, get_var +from openshift_checks import OpenShiftCheck from openshift_checks.mixins import DockerHostMixin @@ -22,25 +22,26 @@ DEPLOYMENT_IMAGE_INFO = { class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. - This check attempts to ensure that required docker images are - either present locally, or able to be pulled down from available - registries defined in a host machine. + Determine docker images that an install would require and check that they + are either present in the host's docker index, or available for the host to pull + with known registries as defined in our inventory file (or defaults). """ name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] + # we use python-docker-py to check local docker for images, and skopeo + # to look for images available remotely without waiting to pull them. + dependencies = ["python-docker-py", "skopeo"] - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Skip hosts with unsupported deployment types.""" - deployment_type = get_var(task_vars, "openshift_deployment_type") + deployment_type = self.get_var("openshift_deployment_type") has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO - return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type + return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type - def run(self, tmp, task_vars): - msg, failed, changed = self.ensure_dependencies(task_vars) + def run(self): + msg, failed, changed = self.ensure_dependencies() if failed: return { "failed": True, @@ -48,18 +49,18 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg } - required_images = self.required_images(task_vars) - missing_images = set(required_images) - set(self.local_images(required_images, task_vars)) + required_images = self.required_images() + missing_images = set(required_images) - set(self.local_images(required_images)) # exit early if all images were found locally if not missing_images: return {"changed": changed} - registries = self.known_docker_registries(task_vars) + registries = self.known_docker_registries() if not registries: return {"failed": True, "msg": "Unable to retrieve any docker registries.", "changed": changed} - available_images = self.available_images(missing_images, registries, task_vars) + available_images = self.available_images(missing_images, registries) unavailable_images = set(missing_images) - set(available_images) if unavailable_images: @@ -74,8 +75,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return {"changed": changed} - @staticmethod - def required_images(task_vars): + def required_images(self): """ Determine which images we expect to need for this host. Returns: a set of required images like 'openshift/origin:v3.6' @@ -92,17 +92,17 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): Registry is not included in constructed images. It may be in oreg_url or etcd image. """ required = set() - deployment_type = get_var(task_vars, "openshift_deployment_type") - host_groups = get_var(task_vars, "group_names") + deployment_type = self.get_var("openshift_deployment_type") + host_groups = self.get_var("group_names") # containerized etcd may not have openshift_image_tag, see bz 1466622 - image_tag = get_var(task_vars, "openshift_image_tag", default="latest") + image_tag = self.get_var("openshift_image_tag", default="latest") image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] if not image_info: return required # template for images that run on top of OpenShift image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") - image_url = get_var(task_vars, "oreg_url", default="") or image_url + image_url = self.get_var("oreg_url", default="") or image_url if 'nodes' in host_groups: for suffix in NODE_IMAGE_SUFFIXES: required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) @@ -112,7 +112,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): required.add(image_info["registry_console_image"]) # images for containerized components - if get_var(task_vars, "openshift", "common", "is_containerized"): + if self.get_var("openshift", "common", "is_containerized"): components = set() if 'nodes' in host_groups: components.update(["node", "openvswitch"]) @@ -125,28 +125,27 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return required - def local_images(self, images, task_vars): + def local_images(self, images): """Filter a list of images and return those available locally.""" return [ image for image in images - if self.is_image_local(image, task_vars) + if self.is_image_local(image) ] - def is_image_local(self, image, task_vars): + def is_image_local(self, image): """Check if image is already in local docker index.""" - result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) + result = self.execute_module("docker_image_facts", {"name": image}) if result.get("failed", False): return False return bool(result.get("images", [])) - @staticmethod - def known_docker_registries(task_vars): + def known_docker_registries(self): """Build a list of docker registries available according to inventory vars.""" - docker_facts = get_var(task_vars, "openshift", "docker") + docker_facts = self.get_var("openshift", "docker") regs = set(docker_facts["additional_registries"]) - deployment_type = get_var(task_vars, "openshift_deployment_type") + deployment_type = self.get_var("openshift_deployment_type") if deployment_type == "origin": regs.update(["docker.io"]) elif "enterprise" in deployment_type: @@ -154,24 +153,25 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return list(regs) - def available_images(self, images, registries, task_vars): - """Inspect existing images using Skopeo and return all images successfully inspected.""" + def available_images(self, images, default_registries): + """Search remotely for images. Returns: list of images found.""" return [ image for image in images - if self.is_available_skopeo_image(image, registries, task_vars) + if self.is_available_skopeo_image(image, default_registries) ] - def is_available_skopeo_image(self, image, registries, task_vars): + def is_available_skopeo_image(self, image, default_registries): """Use Skopeo to determine if required image exists in known registry(s).""" + registries = default_registries - # if image does already includes a registry, just use that + # if image already includes a registry, only use that if image.count("/") > 1: registry, image = image.split("/", 1) registries = [registry] for registry in registries: args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)} - result = self.execute_module("command", args, task_vars=task_vars) + result = self.execute_module("command", args) if result.get("rc", 0) == 0 and not result.get("failed"): return True diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index d2227d244..dea15a56e 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -2,7 +2,7 @@ import json import os.path import re -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException from openshift_checks.mixins import DockerHostMixin @@ -42,8 +42,8 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): ), ] - def run(self, tmp, task_vars): - msg, failed, changed = self.ensure_dependencies(task_vars) + def run(self): + msg, failed, changed = self.ensure_dependencies() if failed: return { "failed": True, @@ -52,7 +52,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - docker_info = self.execute_module("docker_info", {}, task_vars=task_vars) + docker_info = self.execute_module("docker_info", {}) if docker_info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} @@ -76,15 +76,15 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): result = {} if driver == "devicemapper": - result = self.check_devicemapper_support(driver_status, task_vars) + result = self.check_devicemapper_support(driver_status) if driver in ['overlay', 'overlay2']: - result = self.check_overlay_support(docker_info, driver_status, task_vars) + result = self.check_overlay_support(docker_info, driver_status) result['changed'] = result.get('changed', False) or changed return result - def check_devicemapper_support(self, driver_status, task_vars): + def check_devicemapper_support(self, driver_status): """Check if dm storage driver is supported as configured. Return: result dict.""" if driver_status.get("Data loop file"): msg = ( @@ -94,10 +94,10 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "See http://red.ht/2rNperO for further information." ) return {"failed": True, "msg": msg} - result = self.check_dm_usage(driver_status, task_vars) + result = self.check_dm_usage(driver_status) return result - def check_dm_usage(self, driver_status, task_vars): + def check_dm_usage(self, driver_status): """Check usage thresholds for Docker dm storage driver. Return: result dict. Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures @@ -109,7 +109,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): could run out of space first; so we check both. """ vals = dict( - vg_free=self.get_vg_free(driver_status.get("Pool Name"), task_vars), + vg_free=self.get_vg_free(driver_status.get("Pool Name")), data_used=driver_status.get("Data Space Used"), data_total=driver_status.get("Data Space Total"), metadata_used=driver_status.get("Metadata Space Used"), @@ -130,7 +130,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): # determine the threshold percentages which usage should not exceed for name, default in [("data", self.max_thinpool_data_usage_percent), ("metadata", self.max_thinpool_meta_usage_percent)]: - percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default) + percent = self.get_var("max_thinpool_" + name + "_usage_percent", default=default) try: vals[name + "_threshold"] = float(percent) except ValueError: @@ -157,7 +157,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."]) return vals - def get_vg_free(self, pool, task_vars): + def get_vg_free(self, pool): """Determine which VG to examine according to the pool name. Return: size vgs reports. Pool name is the only indicator currently available from the Docker API driver info. We assume a name that looks like "vg--name-docker--pool"; @@ -174,7 +174,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not - ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) + ret = self.execute_module("command", {"_raw_params": vgs_cmd}) if ret.get("failed") or ret.get("rc", 0) != 0: raise OpenShiftCheckException( "Is LVM installed? Failed to run /sbin/vgs " @@ -213,7 +213,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): return float(number) * multiplier - def check_overlay_support(self, docker_info, driver_status, task_vars): + def check_overlay_support(self, docker_info, driver_status): """Check if overlay storage driver is supported for this host. Return: result dict.""" # check for xfs as backing store backing_fs = driver_status.get("Backing Filesystem", "[NONE]") @@ -239,13 +239,13 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): # NOTE: we could check for --selinux-enabled here but docker won't even start with # that option until it's supported in the kernel so we don't need to. - return self.check_overlay_usage(docker_info, task_vars) + return self.check_overlay_usage(docker_info) - def check_overlay_usage(self, docker_info, task_vars): + def check_overlay_usage(self, docker_info): """Check disk usage on OverlayFS backing store volume. Return: result dict.""" path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"] - threshold = get_var(task_vars, "max_overlay_usage_percent", default=self.max_overlay_usage_percent) + threshold = self.get_var("max_overlay_usage_percent", default=self.max_overlay_usage_percent) try: threshold = float(threshold) except ValueError: @@ -254,7 +254,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold), } - mount = self.find_ansible_mount(path, get_var(task_vars, "ansible_mounts")) + mount = self.find_ansible_mount(path, self.get_var("ansible_mounts")) try: free_bytes = mount['size_available'] total_bytes = mount['size_total'] diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py index c04a69765..28c38504d 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py @@ -2,7 +2,7 @@ Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster. """ -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException class EtcdImageDataSize(OpenShiftCheck): @@ -11,24 +11,25 @@ class EtcdImageDataSize(OpenShiftCheck): name = "etcd_imagedata_size" tags = ["etcd"] - def run(self, tmp, task_vars): - etcd_mountpath = self._get_etcd_mountpath(get_var(task_vars, "ansible_mounts")) + def run(self): + etcd_mountpath = self._get_etcd_mountpath(self.get_var("ansible_mounts")) etcd_avail_diskspace = etcd_mountpath["size_available"] etcd_total_diskspace = etcd_mountpath["size_total"] - etcd_imagedata_size_limit = get_var(task_vars, - "etcd_max_image_data_size_bytes", - default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace))) + etcd_imagedata_size_limit = self.get_var( + "etcd_max_image_data_size_bytes", + default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace)) + ) - etcd_is_ssl = get_var(task_vars, "openshift", "master", "etcd_use_ssl", default=False) - etcd_port = get_var(task_vars, "openshift", "master", "etcd_port", default=2379) - etcd_hosts = get_var(task_vars, "openshift", "master", "etcd_hosts") + etcd_is_ssl = self.get_var("openshift", "master", "etcd_use_ssl", default=False) + etcd_port = self.get_var("openshift", "master", "etcd_port", default=2379) + etcd_hosts = self.get_var("openshift", "master", "etcd_hosts") - config_base = get_var(task_vars, "openshift", "common", "config_base") + config_base = self.get_var("openshift", "common", "config_base") - cert = task_vars.get("etcd_client_cert", config_base + "/master/master.etcd-client.crt") - key = task_vars.get("etcd_client_key", config_base + "/master/master.etcd-client.key") - ca_cert = task_vars.get("etcd_client_ca_cert", config_base + "/master/master.etcd-ca.crt") + cert = self.get_var("etcd_client_cert", default=config_base + "/master/master.etcd-client.crt") + key = self.get_var("etcd_client_key", default=config_base + "/master/master.etcd-client.key") + ca_cert = self.get_var("etcd_client_ca_cert", default=config_base + "/master/master.etcd-ca.crt") for etcd_host in list(etcd_hosts): args = { @@ -46,7 +47,7 @@ class EtcdImageDataSize(OpenShiftCheck): }, } - etcdkeysize = self.module_executor("etcdkeysize", args, task_vars) + etcdkeysize = self.execute_module("etcdkeysize", args) if etcdkeysize.get("rc", 0) != 0 or etcdkeysize.get("failed"): msg = 'Failed to retrieve stats for etcd host "{host}": {reason}' diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py index 40c87873d..cc1b14d8a 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py @@ -1,6 +1,6 @@ """Check that scans journalctl for messages caused as a symptom of increased etcd traffic.""" -from openshift_checks import OpenShiftCheck, get_var +from openshift_checks import OpenShiftCheck class EtcdTraffic(OpenShiftCheck): @@ -9,19 +9,18 @@ class EtcdTraffic(OpenShiftCheck): name = "etcd_traffic" tags = ["health", "etcd"] - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Skip hosts that do not have etcd in their group names.""" - group_names = get_var(task_vars, "group_names", default=[]) + group_names = self.get_var("group_names", default=[]) valid_group_names = "etcd" in group_names - version = get_var(task_vars, "openshift", "common", "short_version") + version = self.get_var("openshift", "common", "short_version") valid_version = version in ("3.4", "3.5", "1.4", "1.5") - return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version + return super(EtcdTraffic, self).is_active() and valid_group_names and valid_version - def run(self, tmp, task_vars): - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") + def run(self): + is_containerized = self.get_var("openshift", "common", "is_containerized") unit = "etcd_container" if is_containerized else "etcd" log_matchers = [{ @@ -30,9 +29,7 @@ class EtcdTraffic(OpenShiftCheck): "unit": unit }] - match = self.execute_module("search_journalctl", { - "log_matchers": log_matchers, - }, task_vars) + match = self.execute_module("search_journalctl", {"log_matchers": log_matchers}) if match.get("matched"): msg = ("Higher than normal etcd traffic detected.\n" diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index 7452c9cc1..da7d0364a 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -1,6 +1,6 @@ """A health check for OpenShift clusters.""" -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException class EtcdVolume(OpenShiftCheck): @@ -14,21 +14,18 @@ class EtcdVolume(OpenShiftCheck): # Where to find ectd data, higher priority first. supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] - @classmethod - def is_active(cls, task_vars): - etcd_hosts = get_var(task_vars, "groups", "etcd", default=[]) or get_var(task_vars, "groups", "masters", - default=[]) or [] - is_etcd_host = get_var(task_vars, "ansible_ssh_host") in etcd_hosts - return super(EtcdVolume, cls).is_active(task_vars) and is_etcd_host + def is_active(self): + etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or [] + is_etcd_host = self.get_var("ansible_ssh_host") in etcd_hosts + return super(EtcdVolume, self).is_active() and is_etcd_host - def run(self, tmp, task_vars): - mount_info = self._etcd_mount_info(task_vars) + def run(self): + mount_info = self._etcd_mount_info() available = mount_info["size_available"] total = mount_info["size_total"] used = total - available - threshold = get_var( - task_vars, + threshold = self.get_var( "etcd_device_usage_threshold_percent", default=self.default_threshold_percent ) @@ -45,8 +42,8 @@ class EtcdVolume(OpenShiftCheck): return {"changed": False} - def _etcd_mount_info(self, task_vars): - ansible_mounts = get_var(task_vars, "ansible_mounts") + def _etcd_mount_info(self): + ansible_mounts = self.get_var("ansible_mounts") mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts} for path in self.supported_mount_paths: diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py index c9fc59896..f82ae64d7 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/curator.py +++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py @@ -1,28 +1,21 @@ -""" -Module for performing checks on an Curator logging deployment -""" +"""Check for an aggregated logging Curator deployment""" -from openshift_checks import get_var from openshift_checks.logging.logging import LoggingCheck class Curator(LoggingCheck): - """Module that checks an integrated logging Curator deployment""" + """Check for an aggregated logging Curator deployment""" name = "curator" tags = ["health", "logging"] logging_namespace = None - def run(self, tmp, task_vars): - """Check various things and gather errors. Returns: result as hash""" - - self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") + def run(self): + self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") curator_pods, error = super(Curator, self).get_pods_for_component( - self.module_executor, self.logging_namespace, "curator", - task_vars ) if error: return {"failed": True, "changed": False, "msg": error} diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py index 01cb35b81..1e478c04d 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -1,35 +1,30 @@ -""" -Module for performing checks on an Elasticsearch logging deployment -""" +"""Check for an aggregated logging Elasticsearch deployment""" import json import re -from openshift_checks import get_var from openshift_checks.logging.logging import LoggingCheck class Elasticsearch(LoggingCheck): - """Module that checks an integrated logging Elasticsearch deployment""" + """Check for an aggregated logging Elasticsearch deployment""" name = "elasticsearch" tags = ["health", "logging"] logging_namespace = None - def run(self, tmp, task_vars): + def run(self): """Check various things and gather errors. Returns: result as hash""" - self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") + self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") es_pods, error = super(Elasticsearch, self).get_pods_for_component( - self.execute_module, self.logging_namespace, "es", - task_vars, ) if error: return {"failed": True, "changed": False, "msg": error} - check_error = self.check_elasticsearch(es_pods, task_vars) + check_error = self.check_elasticsearch(es_pods) if check_error: msg = ("The following Elasticsearch deployment issue was found:" @@ -41,7 +36,7 @@ class Elasticsearch(LoggingCheck): return {"failed": False, "changed": False, "msg": 'No problems found with Elasticsearch deployment.'} def _not_running_elasticsearch_pods(self, es_pods): - """Returns: list of running pods, list of errors about non-running pods""" + """Returns: list of pods that are not running, list of errors about non-running pods""" not_running = super(Elasticsearch, self).not_running_pods(es_pods) if not_running: return not_running, [( @@ -54,7 +49,7 @@ class Elasticsearch(LoggingCheck): ))] return not_running, [] - def check_elasticsearch(self, es_pods, task_vars): + def check_elasticsearch(self, es_pods): """Various checks for elasticsearch. Returns: error string""" not_running_pods, error_msgs = self._not_running_elasticsearch_pods(es_pods) running_pods = [pod for pod in es_pods if pod not in not_running_pods] @@ -65,10 +60,10 @@ class Elasticsearch(LoggingCheck): } if not pods_by_name: return 'No logging Elasticsearch pods were found. Is logging deployed?' - error_msgs += self._check_elasticsearch_masters(pods_by_name, task_vars) - error_msgs += self._check_elasticsearch_node_list(pods_by_name, task_vars) - error_msgs += self._check_es_cluster_health(pods_by_name, task_vars) - error_msgs += self._check_elasticsearch_diskspace(pods_by_name, task_vars) + error_msgs += self._check_elasticsearch_masters(pods_by_name) + error_msgs += self._check_elasticsearch_node_list(pods_by_name) + error_msgs += self._check_es_cluster_health(pods_by_name) + error_msgs += self._check_elasticsearch_diskspace(pods_by_name) return '\n'.join(error_msgs) @staticmethod @@ -76,14 +71,14 @@ class Elasticsearch(LoggingCheck): base = "exec {name} -- curl -s --cert {base}cert --key {base}key --cacert {base}ca -XGET '{url}'" return base.format(base="/etc/elasticsearch/secret/admin-", name=pod_name, url=url) - def _check_elasticsearch_masters(self, pods_by_name, task_vars): + def _check_elasticsearch_masters(self, pods_by_name): """Check that Elasticsearch masters are sane. Returns: list of error strings""" es_master_names = set() error_msgs = [] for pod_name in pods_by_name.keys(): # Compare what each ES node reports as master and compare for split brain get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master") - master_name_str = self._exec_oc(get_master_cmd, [], task_vars) + master_name_str = self._exec_oc(get_master_cmd, []) master_names = (master_name_str or '').split(' ') if len(master_names) > 1: es_master_names.add(master_names[1]) @@ -108,7 +103,7 @@ class Elasticsearch(LoggingCheck): return error_msgs - def _check_elasticsearch_node_list(self, pods_by_name, task_vars): + def _check_elasticsearch_node_list(self, pods_by_name): """Check that reported ES masters are accounted for by pods. Returns: list of error strings""" if not pods_by_name: @@ -116,7 +111,7 @@ class Elasticsearch(LoggingCheck): # get ES cluster nodes node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes') - cluster_node_data = self._exec_oc(node_cmd, [], task_vars) + cluster_node_data = self._exec_oc(node_cmd, []) try: cluster_nodes = json.loads(cluster_node_data)['nodes'] except (ValueError, KeyError): @@ -138,12 +133,12 @@ class Elasticsearch(LoggingCheck): return error_msgs - def _check_es_cluster_health(self, pods_by_name, task_vars): + def _check_es_cluster_health(self, pods_by_name): """Exec into the elasticsearch pods and check the cluster health. Returns: list of errors""" error_msgs = [] for pod_name in pods_by_name.keys(): cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true') - cluster_health_data = self._exec_oc(cluster_health_cmd, [], task_vars) + cluster_health_data = self._exec_oc(cluster_health_cmd, []) try: health_res = json.loads(cluster_health_data) if not health_res or not health_res.get('status'): @@ -162,7 +157,7 @@ class Elasticsearch(LoggingCheck): return error_msgs - def _check_elasticsearch_diskspace(self, pods_by_name, task_vars): + def _check_elasticsearch_diskspace(self, pods_by_name): """ Exec into an ES pod and query the diskspace on the persistent volume. Returns: list of errors @@ -170,7 +165,7 @@ class Elasticsearch(LoggingCheck): error_msgs = [] for pod_name in pods_by_name.keys(): df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) - disk_output = self._exec_oc(df_cmd, [], task_vars) + disk_output = self._exec_oc(df_cmd, []) lines = disk_output.splitlines() # expecting one header looking like 'IUse% Use%' and one body line body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$' @@ -182,7 +177,7 @@ class Elasticsearch(LoggingCheck): continue inode_pct, disk_pct = re.match(body_re, lines[1]).groups() - inode_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_inode_pct', default='90') + inode_pct_thresh = self.get_var('openshift_check_efk_es_inode_pct', default='90') if int(inode_pct) >= int(inode_pct_thresh): error_msgs.append( 'Inode percent usage on the storage volume for logging ES pod "{pod}"\n' @@ -193,7 +188,7 @@ class Elasticsearch(LoggingCheck): limit=str(inode_pct_thresh), param='openshift_check_efk_es_inode_pct', )) - disk_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_storage_pct', default='80') + disk_pct_thresh = self.get_var('openshift_check_efk_es_storage_pct', default='80') if int(disk_pct) >= int(disk_pct_thresh): error_msgs.append( 'Disk percent usage on the storage volume for logging ES pod "{pod}"\n' @@ -207,11 +202,9 @@ class Elasticsearch(LoggingCheck): return error_msgs - def _exec_oc(self, cmd_str, extra_args, task_vars): + def _exec_oc(self, cmd_str, extra_args): return super(Elasticsearch, self).exec_oc( - self.execute_module, self.logging_namespace, cmd_str, extra_args, - task_vars, ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py index 627567293..063e707a9 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py @@ -1,33 +1,29 @@ -""" -Module for performing checks on an Fluentd logging deployment -""" +"""Check for an aggregated logging Fluentd deployment""" import json -from openshift_checks import get_var from openshift_checks.logging.logging import LoggingCheck class Fluentd(LoggingCheck): - """Module that checks an integrated logging Fluentd deployment""" + """Check for an aggregated logging Fluentd deployment""" + name = "fluentd" tags = ["health", "logging"] logging_namespace = None - def run(self, tmp, task_vars): + def run(self): """Check various things and gather errors. Returns: result as hash""" - self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") + self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") fluentd_pods, error = super(Fluentd, self).get_pods_for_component( - self.execute_module, self.logging_namespace, "fluentd", - task_vars, ) if error: return {"failed": True, "changed": False, "msg": error} - check_error = self.check_fluentd(fluentd_pods, task_vars) + check_error = self.check_fluentd(fluentd_pods) if check_error: msg = ("The following Fluentd deployment issue was found:" @@ -53,10 +49,9 @@ class Fluentd(LoggingCheck): ).format(label=node_selector) return fluentd_nodes, None - @staticmethod - def _check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars): + def _check_node_labeling(self, nodes_by_name, fluentd_nodes, node_selector): """Note if nodes are not labeled as expected. Returns: error string""" - intended_nodes = get_var(task_vars, 'openshift_logging_fluentd_hosts', default=['--all']) + intended_nodes = self.get_var('openshift_logging_fluentd_hosts', default=['--all']) if not intended_nodes or '--all' in intended_nodes: intended_nodes = nodes_by_name.keys() nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys()) @@ -114,13 +109,15 @@ class Fluentd(LoggingCheck): )) return None - def check_fluentd(self, pods, task_vars): + def check_fluentd(self, pods): """Verify fluentd is running everywhere. Returns: error string""" - node_selector = get_var(task_vars, 'openshift_logging_fluentd_nodeselector', - default='logging-infra-fluentd=true') + node_selector = self.get_var( + 'openshift_logging_fluentd_nodeselector', + default='logging-infra-fluentd=true' + ) - nodes_by_name, error = self.get_nodes_by_name(task_vars) + nodes_by_name, error = self.get_nodes_by_name() if error: return error @@ -129,7 +126,7 @@ class Fluentd(LoggingCheck): return error error_msgs = [] - error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars) + error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector) if error: error_msgs.append(error) error = self._check_nodes_have_fluentd(pods, fluentd_nodes) @@ -148,9 +145,9 @@ class Fluentd(LoggingCheck): return '\n'.join(error_msgs) - def get_nodes_by_name(self, task_vars): + def get_nodes_by_name(self): """Retrieve all the node definitions. Returns: dict(name: node), error string""" - nodes_json = self._exec_oc("get nodes -o json", [], task_vars) + nodes_json = self._exec_oc("get nodes -o json", []) try: nodes = json.loads(nodes_json) except ValueError: # no valid json - should not happen @@ -162,9 +159,9 @@ class Fluentd(LoggingCheck): for node in nodes['items'] }, None - def _exec_oc(self, cmd_str, extra_args, task_vars): - return super(Fluentd, self).exec_oc(self.execute_module, - self.logging_namespace, - cmd_str, - extra_args, - task_vars) + def _exec_oc(self, cmd_str, extra_args): + return super(Fluentd, self).exec_oc( + self.logging_namespace, + cmd_str, + extra_args, + ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py index 551e8dfa0..60f94e106 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -12,7 +12,6 @@ except ImportError: from urllib.error import HTTPError, URLError import urllib.request as urllib2 -from openshift_checks import get_var from openshift_checks.logging.logging import LoggingCheck @@ -24,22 +23,20 @@ class Kibana(LoggingCheck): logging_namespace = None - def run(self, tmp, task_vars): + def run(self): """Check various things and gather errors. Returns: result as hash""" - self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") + self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") kibana_pods, error = super(Kibana, self).get_pods_for_component( - self.execute_module, self.logging_namespace, "kibana", - task_vars, ) if error: return {"failed": True, "changed": False, "msg": error} check_error = self.check_kibana(kibana_pods) if not check_error: - check_error = self._check_kibana_route(task_vars) + check_error = self._check_kibana_route() if check_error: msg = ("The following Kibana deployment issue was found:" @@ -50,7 +47,7 @@ class Kibana(LoggingCheck): # TODO(lmeyer): run it all again for the ops cluster return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'} - def _verify_url_internal(self, url, task_vars): + def _verify_url_internal(self, url): """ Try to reach a URL from the host. Returns: success (bool), reason (for failure) @@ -62,7 +59,7 @@ class Kibana(LoggingCheck): # TODO(lmeyer): give users option to validate certs status_code=302, ) - result = self.execute_module('uri', args, None, task_vars) + result = self.execute_module('uri', args) if result.get('failed'): return result['msg'] return None @@ -114,14 +111,14 @@ class Kibana(LoggingCheck): return None - def _get_kibana_url(self, task_vars): + def _get_kibana_url(self): """ Get kibana route or report error. Returns: url (or empty), reason for failure """ # Get logging url - get_route = self._exec_oc("get route logging-kibana -o json", [], task_vars) + get_route = self._exec_oc("get route logging-kibana -o json", []) if not get_route: return None, 'no_route_exists' @@ -139,7 +136,7 @@ class Kibana(LoggingCheck): return 'https://{}/'.format(host), None - def _check_kibana_route(self, task_vars): + def _check_kibana_route(self): """ Check to see if kibana route is up and working. Returns: error string @@ -160,12 +157,12 @@ class Kibana(LoggingCheck): ), ) - kibana_url, error = self._get_kibana_url(task_vars) + kibana_url, error = self._get_kibana_url() if not kibana_url: return known_errors.get(error, error) # first, check that kibana is reachable from the master. - error = self._verify_url_internal(kibana_url, task_vars) + error = self._verify_url_internal(kibana_url) if error: if 'urlopen error [Errno 111] Connection refused' in error: error = ( @@ -190,7 +187,7 @@ class Kibana(LoggingCheck): # in production we would like the kibana route to work from outside the # cluster too; but that may not be the case, so allow disabling just this part. - if not get_var(task_vars, "openshift_check_efk_kibana_external", default=True): + if not self.get_var("openshift_check_efk_kibana_external", default=True): return None error = self._verify_url_external(kibana_url) if error: @@ -221,9 +218,9 @@ class Kibana(LoggingCheck): return error return None - def _exec_oc(self, cmd_str, extra_args, task_vars): - return super(Kibana, self).exec_oc(self.execute_module, - self.logging_namespace, - cmd_str, - extra_args, - task_vars) + def _exec_oc(self, cmd_str, extra_args): + return super(Kibana, self).exec_oc( + self.logging_namespace, + cmd_str, + extra_args, + ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 6e951e82c..a48e1c728 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -5,39 +5,36 @@ Util functions for performing checks on an Elasticsearch, Fluentd, and Kibana st import json import os -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException class LoggingCheck(OpenShiftCheck): - """Base class for logging component checks""" + """Base class for OpenShift aggregated logging component checks""" name = "logging" + logging_namespace = "logging" - @classmethod - def is_active(cls, task_vars): - return super(LoggingCheck, cls).is_active(task_vars) and cls.is_first_master(task_vars) + def is_active(self): + logging_deployed = self.get_var("openshift_hosted_logging_deploy", default=False) + return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master() - @staticmethod - def is_first_master(task_vars): - """Run only on first master and only when logging is configured. Returns: bool""" - logging_deployed = get_var(task_vars, "openshift_hosted_logging_deploy", default=True) + def is_first_master(self): + """Determine if running on first master. Returns: bool""" # Note: It would be nice to use membership in oo_first_master group, however for now it # seems best to avoid requiring that setup and just check this is the first master. - hostname = get_var(task_vars, "ansible_ssh_host") or [None] - masters = get_var(task_vars, "groups", "masters", default=None) or [None] - return logging_deployed and masters[0] == hostname + hostname = self.get_var("ansible_ssh_host") or [None] + masters = self.get_var("groups", "masters", default=None) or [None] + return masters[0] == hostname - def run(self, tmp, task_vars): + def run(self): pass - def get_pods_for_component(self, execute_module, namespace, logging_component, task_vars): + def get_pods_for_component(self, namespace, logging_component): """Get all pods for a given component. Returns: list of pods for component, error string""" pod_output = self.exec_oc( - execute_module, namespace, "get pods -l component={} -o json".format(logging_component), [], - task_vars ) try: pods = json.loads(pod_output) @@ -45,7 +42,7 @@ class LoggingCheck(OpenShiftCheck): raise ValueError() except ValueError: # successful run but non-parsing data generally means there were no pods in the namespace - return None, 'There are no pods in the {} namespace. Is logging deployed?'.format(namespace) + return None, 'No pods were found for the "{}" logging component.'.format(logging_component) return pods['items'], None @@ -63,14 +60,13 @@ class LoggingCheck(OpenShiftCheck): ) ] - @staticmethod - def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None): + def exec_oc(self, namespace="logging", cmd_str="", extra_args=None): """ Execute an 'oc' command in the remote host. Returns: output of command and namespace, or raises OpenShiftCheckException on error """ - config_base = get_var(task_vars, "openshift", "common", "config_base") + config_base = self.get_var("openshift", "common", "config_base") args = { "namespace": namespace, "config_file": os.path.join(config_base, "master", "admin.kubeconfig"), @@ -78,7 +74,7 @@ class LoggingCheck(OpenShiftCheck): "extra_args": list(extra_args) if extra_args else [], } - result = execute_module("ocutil", args, None, task_vars) + result = self.execute_module("ocutil", args) if result.get("failed"): msg = ( 'Unexpected error using `oc` to validate the logging stack components.\n' diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py new file mode 100644 index 000000000..b24e88e05 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py @@ -0,0 +1,130 @@ +""" +Check for ensuring logs from pods can be queried in a reasonable amount of time. +""" + +import json +import time + +from uuid import uuid4 + +from openshift_checks import OpenShiftCheckException +from openshift_checks.logging.logging import LoggingCheck + + +ES_CMD_TIMEOUT_SECONDS = 30 + + +class LoggingIndexTime(LoggingCheck): + """Check that pod logs are aggregated and indexed in ElasticSearch within a reasonable amount of time.""" + name = "logging_index_time" + tags = ["health", "logging"] + + logging_namespace = "logging" + + def run(self): + """Add log entry by making unique request to Kibana. Check for unique entry in the ElasticSearch pod logs.""" + try: + log_index_timeout = int( + self.get_var("openshift_check_logging_index_timeout_seconds", default=ES_CMD_TIMEOUT_SECONDS) + ) + except ValueError: + return { + "failed": True, + "msg": ('Invalid value provided for "openshift_check_logging_index_timeout_seconds". ' + 'Value must be an integer representing an amount in seconds.'), + } + + running_component_pods = dict() + + # get all component pods + self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace) + for component, name in (['kibana', 'Kibana'], ['es', 'Elasticsearch']): + pods, error = self.get_pods_for_component(self.logging_namespace, component) + + if error: + msg = 'Unable to retrieve pods for the {} logging component: {}' + return {"failed": True, "changed": False, "msg": msg.format(name, error)} + + running_pods = self.running_pods(pods) + + if not running_pods: + msg = ('No {} pods in the "Running" state were found.' + 'At least one pod is required in order to perform this check.') + return {"failed": True, "changed": False, "msg": msg.format(name)} + + running_component_pods[component] = running_pods + + uuid = self.curl_kibana_with_uuid(running_component_pods["kibana"][0]) + self.wait_until_cmd_or_err(running_component_pods["es"][0], uuid, log_index_timeout) + return {} + + def wait_until_cmd_or_err(self, es_pod, uuid, timeout_secs): + """Retry an Elasticsearch query every second until query success, or a defined + length of time has passed.""" + deadline = time.time() + timeout_secs + interval = 1 + while not self.query_es_from_es(es_pod, uuid): + if time.time() + interval > deadline: + msg = "expecting match in Elasticsearch for message with uuid {}, but no matches were found after {}s." + raise OpenShiftCheckException(msg.format(uuid, timeout_secs)) + time.sleep(interval) + + def curl_kibana_with_uuid(self, kibana_pod): + """curl Kibana with a unique uuid.""" + uuid = self.generate_uuid() + pod_name = kibana_pod["metadata"]["name"] + exec_cmd = "exec {pod_name} -c kibana -- curl --max-time 30 -s http://localhost:5601/{uuid}" + exec_cmd = exec_cmd.format(pod_name=pod_name, uuid=uuid) + + error_str = self.exec_oc(self.logging_namespace, exec_cmd, []) + + try: + error_code = json.loads(error_str)["statusCode"] + except KeyError: + msg = ('invalid response returned from Kibana request (Missing "statusCode" key):\n' + 'Command: {}\nResponse: {}').format(exec_cmd, error_str) + raise OpenShiftCheckException(msg) + except ValueError: + msg = ('invalid response returned from Kibana request (Non-JSON output):\n' + 'Command: {}\nResponse: {}').format(exec_cmd, error_str) + raise OpenShiftCheckException(msg) + + if error_code != 404: + msg = 'invalid error code returned from Kibana request. Expecting error code "404", but got "{}" instead.' + raise OpenShiftCheckException(msg.format(error_code)) + + return uuid + + def query_es_from_es(self, es_pod, uuid): + """curl the Elasticsearch pod and look for a unique uuid in its logs.""" + pod_name = es_pod["metadata"]["name"] + exec_cmd = ( + "exec {pod_name} -- curl --max-time 30 -s -f " + "--cacert /etc/elasticsearch/secret/admin-ca " + "--cert /etc/elasticsearch/secret/admin-cert " + "--key /etc/elasticsearch/secret/admin-key " + "https://logging-es:9200/project.{namespace}*/_count?q=message:{uuid}" + ) + exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace, uuid=uuid) + result = self.exec_oc(self.logging_namespace, exec_cmd, []) + + try: + count = json.loads(result)["count"] + except KeyError: + msg = 'invalid response from Elasticsearch query:\n"{}"\nMissing "count" key:\n{}' + raise OpenShiftCheckException(msg.format(exec_cmd, result)) + except ValueError: + msg = 'invalid response from Elasticsearch query:\n"{}"\nNon-JSON output:\n{}' + raise OpenShiftCheckException(msg.format(exec_cmd, result)) + + return count + + @staticmethod + def running_pods(pods): + """Filter pods that are running.""" + return [pod for pod in pods if pod['status']['phase'] == 'Running'] + + @staticmethod + def generate_uuid(): + """Wrap uuid generator. Allows for testing with expected values.""" + return str(uuid4()) diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py index f4e31065f..765ba072d 100644 --- a/roles/openshift_health_checker/openshift_checks/memory_availability.py +++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py @@ -1,5 +1,5 @@ -# pylint: disable=missing-docstring -from openshift_checks import OpenShiftCheck, get_var +"""Check that recommended memory is available.""" +from openshift_checks import OpenShiftCheck MIB = 2**20 GIB = 2**30 @@ -21,19 +21,18 @@ class MemoryAvailability(OpenShiftCheck): # https://access.redhat.com/solutions/3006511 physical RAM is partly reserved from memtotal memtotal_adjustment = 1 * GIB - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Skip hosts that do not have recommended memory requirements.""" - group_names = get_var(task_vars, "group_names", default=[]) - has_memory_recommendation = bool(set(group_names).intersection(cls.recommended_memory_bytes)) - return super(MemoryAvailability, cls).is_active(task_vars) and has_memory_recommendation + group_names = self.get_var("group_names", default=[]) + has_memory_recommendation = bool(set(group_names).intersection(self.recommended_memory_bytes)) + return super(MemoryAvailability, self).is_active() and has_memory_recommendation - def run(self, tmp, task_vars): - group_names = get_var(task_vars, "group_names") - total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * MIB + def run(self): + group_names = self.get_var("group_names") + total_memory_bytes = self.get_var("ansible_memtotal_mb") * MIB recommended_min = max(self.recommended_memory_bytes.get(name, 0) for name in group_names) - configured_min = float(get_var(task_vars, "openshift_check_min_host_memory_gb", default=0)) * GIB + configured_min = float(self.get_var("openshift_check_min_host_memory_gb", default=0)) * GIB min_memory_bytes = configured_min or recommended_min if total_memory_bytes + self.memtotal_adjustment < min_memory_bytes: diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 2cb2e21aa..3b2c64e6a 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -2,19 +2,16 @@ Mixin classes meant to be used with subclasses of OpenShiftCheck. """ -from openshift_checks import get_var - class NotContainerizedMixin(object): """Mixin for checks that are only active when not in containerized mode.""" # permanent # pylint: disable=too-few-public-methods # Reason: The mixin is not intended to stand on its own as a class. - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Only run on non-containerized hosts.""" - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized + is_containerized = self.get_var("openshift", "common", "is_containerized") + return super(NotContainerizedMixin, self).is_active() and not is_containerized class DockerHostMixin(object): @@ -22,28 +19,26 @@ class DockerHostMixin(object): dependencies = [] - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Only run on hosts that depend on Docker.""" - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - is_node = "nodes" in get_var(task_vars, "group_names", default=[]) - return super(DockerHostMixin, cls).is_active(task_vars) and (is_containerized or is_node) + is_containerized = self.get_var("openshift", "common", "is_containerized") + is_node = "nodes" in self.get_var("group_names", default=[]) + return super(DockerHostMixin, self).is_active() and (is_containerized or is_node) - def ensure_dependencies(self, task_vars): + def ensure_dependencies(self): """ Ensure that docker-related packages exist, but not on atomic hosts (which would not be able to install but should already have them). Returns: msg, failed, changed """ - if get_var(task_vars, "openshift", "common", "is_atomic"): + if self.get_var("openshift", "common", "is_atomic"): return "", False, False # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: result = self.execute_module( - get_var(task_vars, "ansible_pkg_mgr", default="yum"), + self.get_var("ansible_pkg_mgr", default="yum"), {"name": self.dependencies, "state": "present"}, - task_vars=task_vars, ) msg = result.get("msg", "") if result.get("failed"): diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 2dd045f1f..cd6ebd493 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -3,7 +3,7 @@ Ansible module for determining if an installed version of Open vSwitch is incomp currently installed version of OpenShift. """ -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException from openshift_checks.mixins import NotContainerizedMixin @@ -27,27 +27,26 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): "1": "3", } - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Skip hosts that do not have package requirements.""" - group_names = get_var(task_vars, "group_names", default=[]) + group_names = self.get_var("group_names", default=[]) master_or_node = 'masters' in group_names or 'nodes' in group_names - return super(OvsVersion, cls).is_active(task_vars) and master_or_node + return super(OvsVersion, self).is_active() and master_or_node - def run(self, tmp, task_vars): + def run(self): args = { "package_list": [ { "name": "openvswitch", - "version": self.get_required_ovs_version(task_vars), + "version": self.get_required_ovs_version(), }, ], } - return self.execute_module("rpm_version", args, task_vars=task_vars) + return self.execute_module("rpm_version", args) - def get_required_ovs_version(self, task_vars): + def get_required_ovs_version(self): """Return the correct Open vSwitch version for the current OpenShift version""" - openshift_version = self._get_openshift_version(task_vars) + openshift_version = self._get_openshift_version() if float(openshift_version) < 3.5: return self.openshift_to_ovs_version["3.4"] @@ -59,8 +58,8 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" raise OpenShiftCheckException(msg.format(openshift_version)) - def _get_openshift_version(self, task_vars): - openshift_version = get_var(task_vars, "openshift_image_tag") + def _get_openshift_version(self): + openshift_version = self.get_var("openshift_image_tag") if openshift_version and openshift_version[0] == 'v': openshift_version = openshift_version[1:] diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index 0dd2b1286..a86180b00 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -1,5 +1,6 @@ -# pylint: disable=missing-docstring -from openshift_checks import OpenShiftCheck, get_var +"""Check that required RPM packages are available.""" + +from openshift_checks import OpenShiftCheck from openshift_checks.mixins import NotContainerizedMixin @@ -9,13 +10,13 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): name = "package_availability" tags = ["preflight"] - @classmethod - def is_active(cls, task_vars): - return super(PackageAvailability, cls).is_active(task_vars) and task_vars["ansible_pkg_mgr"] == "yum" + def is_active(self): + """Run only when yum is the package manager as the code is specific to it.""" + return super(PackageAvailability, self).is_active() and self.get_var("ansible_pkg_mgr") == "yum" - def run(self, tmp, task_vars): - rpm_prefix = get_var(task_vars, "openshift", "common", "service_type") - group_names = get_var(task_vars, "group_names", default=[]) + def run(self): + rpm_prefix = self.get_var("openshift", "common", "service_type") + group_names = self.get_var("group_names", default=[]) packages = set() @@ -25,10 +26,11 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) + return self.execute_module("check_yum_update", args) @staticmethod def master_packages(rpm_prefix): + """Return a list of RPMs that we expect a master install to have available.""" return [ "{rpm_prefix}".format(rpm_prefix=rpm_prefix), "{rpm_prefix}-clients".format(rpm_prefix=rpm_prefix), @@ -44,6 +46,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): @staticmethod def node_packages(rpm_prefix): + """Return a list of RPMs that we expect a node install to have available.""" return [ "{rpm_prefix}".format(rpm_prefix=rpm_prefix), "{rpm_prefix}-node".format(rpm_prefix=rpm_prefix), diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index f432380c6..1e9aecbe0 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -1,14 +1,14 @@ -# pylint: disable=missing-docstring +"""Check that a yum update would not run into conflicts with available packages.""" from openshift_checks import OpenShiftCheck from openshift_checks.mixins import NotContainerizedMixin class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): - """Check that there are no conflicts in RPM packages.""" + """Check that a yum update would not run into conflicts with available packages.""" name = "package_update" tags = ["preflight"] - def run(self, tmp, task_vars): + def run(self): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) + return self.execute_module("check_yum_update", args) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 204752bd0..020786804 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -1,5 +1,5 @@ -# pylint: disable=missing-docstring -from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +"""Check that available RPM packages match the required versions.""" +from openshift_checks import OpenShiftCheck, OpenShiftCheckException from openshift_checks.mixins import NotContainerizedMixin @@ -28,29 +28,28 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): "1": "3", } - @classmethod - def is_active(cls, task_vars): + def is_active(self): """Skip hosts that do not have package requirements.""" - group_names = get_var(task_vars, "group_names", default=[]) + group_names = self.get_var("group_names", default=[]) master_or_node = 'masters' in group_names or 'nodes' in group_names - return super(PackageVersion, cls).is_active(task_vars) and master_or_node + return super(PackageVersion, self).is_active() and master_or_node - def run(self, tmp, task_vars): - rpm_prefix = get_var(task_vars, "openshift", "common", "service_type") - openshift_release = get_var(task_vars, "openshift_release", default='') - deployment_type = get_var(task_vars, "openshift_deployment_type") + def run(self): + rpm_prefix = self.get_var("openshift", "common", "service_type") + openshift_release = self.get_var("openshift_release", default='') + deployment_type = self.get_var("openshift_deployment_type") check_multi_minor_release = deployment_type in ['openshift-enterprise'] args = { "package_list": [ { "name": "openvswitch", - "version": self.get_required_ovs_version(task_vars), + "version": self.get_required_ovs_version(), "check_multi": False, }, { "name": "docker", - "version": self.get_required_docker_version(task_vars), + "version": self.get_required_docker_version(), "check_multi": False, }, { @@ -71,13 +70,13 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) + return self.execute_module("aos_version", args) - def get_required_ovs_version(self, task_vars): + def get_required_ovs_version(self): """Return the correct Open vSwitch version for the current OpenShift version. If the current OpenShift version is >= 3.5, ensure Open vSwitch version 2.6, Else ensure Open vSwitch version 2.4""" - openshift_version = self.get_openshift_version(task_vars) + openshift_version = self.get_openshift_version() if float(openshift_version) < 3.5: return self.openshift_to_ovs_version["3.4"] @@ -89,12 +88,12 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" raise OpenShiftCheckException(msg.format(openshift_version)) - def get_required_docker_version(self, task_vars): + def get_required_docker_version(self): """Return the correct Docker version for the current OpenShift version. If the OpenShift version is 3.1, ensure Docker version 1.8. If the OpenShift version is 3.2 or 3.3, ensure Docker version 1.10. If the current OpenShift version is >= 3.4, ensure Docker version 1.12.""" - openshift_version = self.get_openshift_version(task_vars) + openshift_version = self.get_openshift_version() if float(openshift_version) >= 3.4: return self.openshift_to_docker_version["3.4"] @@ -106,14 +105,16 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): msg = "There is no recommended version of Docker for the current version of OpenShift: {}" raise OpenShiftCheckException(msg.format(openshift_version)) - def get_openshift_version(self, task_vars): - openshift_version = get_var(task_vars, "openshift_image_tag") + def get_openshift_version(self): + """Return received image tag as a normalized X.Y minor version string.""" + openshift_version = self.get_var("openshift_image_tag") if openshift_version and openshift_version[0] == 'v': openshift_version = openshift_version[1:] return self.parse_version(openshift_version) def parse_version(self, version): + """Return a normalized X.Y minor version string.""" components = version.split(".") if not components or len(components) < 2: msg = "An invalid version of OpenShift was found for this host: {}" diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index 9383b233c..2d068be3d 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -15,14 +15,13 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru name = _name tags = _tags or [] - def __init__(self, execute_module=None): + def __init__(self, execute_module=None, task_vars=None, tmp=None): pass - @classmethod - def is_active(cls, task_vars): + def is_active(self): return is_active - def run(self, tmp, task_vars): + def run(self): if run_exception is not None: raise run_exception return run_return @@ -124,7 +123,7 @@ def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch): check_return_value = {'ok': 'test'} check_class = fake_check(run_return=check_return_value) - monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -138,7 +137,7 @@ def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch): check_return_value = {'ok': 'test', 'changed': True} check_class = fake_check(run_return=check_return_value) - monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -152,7 +151,7 @@ def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch): check_return_value = {'failed': True} check_class = fake_check(run_return=check_return_value) - monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -167,7 +166,7 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): exception_msg = 'fake check has an exception' run_exception = OpenShiftCheckException(exception_msg) check_class = fake_check(run_exception=run_exception) - monkeypatch.setattr(plugin, 'load_known_checks', lambda: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -179,7 +178,7 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch): - monkeypatch.setattr(plugin, 'load_known_checks', lambda: {}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {}) result = plugin.run(tmp=None, task_vars=task_vars) diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index 945b9eafc..e98d02c58 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -17,7 +17,7 @@ def test_is_active(group_names, is_active): task_vars = dict( group_names=group_names, ) - assert DiskAvailability.is_active(task_vars=task_vars) == is_active + assert DiskAvailability(None, task_vars).is_active() == is_active @pytest.mark.parametrize('ansible_mounts,extra_words', [ @@ -30,10 +30,9 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): group_names=['masters'], ansible_mounts=ansible_mounts, ) - check = DiskAvailability(execute_module=fake_execute_module) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + DiskAvailability(fake_execute_module, task_vars).run() for word in 'determine disk availability'.split() + extra_words: assert word in str(excinfo.value) @@ -93,8 +92,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ansible_mounts=ansible_mounts, ) - check = DiskAvailability(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = DiskAvailability(fake_execute_module, task_vars).run() assert not result.get('failed', False) @@ -168,8 +166,7 @@ def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible ansible_mounts=ansible_mounts, ) - check = DiskAvailability(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = DiskAvailability(fake_execute_module, task_vars).run() assert result['failed'] for word in 'below recommended'.split() + extra_words: diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 3b9e097fb..8d0a53df9 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -21,7 +21,7 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active openshift_deployment_type=deployment_type, group_names=group_names, ) - assert DockerImageAvailability.is_active(task_vars=task_vars) == expect_active + assert DockerImageAvailability(None, task_vars).is_active() == expect_active @pytest.mark.parametrize("is_containerized,is_atomic", [ @@ -31,7 +31,7 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, module_args, task_vars): + def execute_module(module_name, module_args, *_): if module_name == "yum": return {"changed": True} @@ -42,7 +42,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): 'images': [module_args['name']], } - result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( + result = DockerImageAvailability(execute_module, task_vars=dict( openshift=dict( common=dict( service_type='origin', @@ -54,7 +54,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): openshift_deployment_type='origin', openshift_image_tag='3.4', group_names=['nodes', 'masters'], - )) + )).run() assert not result.get('failed', False) @@ -64,12 +64,12 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, module_args, task_vars): + def execute_module(module_name, *_): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} - result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( + result = DockerImageAvailability(execute_module, task_vars=dict( openshift=dict( common=dict( service_type='origin', @@ -81,13 +81,13 @@ def test_all_images_available_remotely(available_locally): openshift_deployment_type='origin', openshift_image_tag='v3.4', group_names=['nodes', 'masters'], - )) + )).run() assert not result.get('failed', False) def test_all_images_unavailable(): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, *_): if module_name == "command": return { 'failed': True, @@ -97,8 +97,7 @@ def test_all_images_unavailable(): 'changed': False, } - check = DockerImageAvailability(execute_module=execute_module) - actual = check.run(tmp=None, task_vars=dict( + actual = DockerImageAvailability(execute_module, task_vars=dict( openshift=dict( common=dict( service_type='origin', @@ -110,7 +109,7 @@ def test_all_images_unavailable(): openshift_deployment_type="openshift-enterprise", openshift_image_tag='latest', group_names=['nodes', 'masters'], - )) + )).run() assert actual['failed'] assert "required Docker images are not available" in actual['msg'] @@ -127,7 +126,7 @@ def test_all_images_unavailable(): ), ]) def test_skopeo_update_failure(message, extra_words): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, *_): if module_name == "yum": return { "failed": True, @@ -137,7 +136,7 @@ def test_skopeo_update_failure(message, extra_words): return {'changed': False} - actual = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( + actual = DockerImageAvailability(execute_module, task_vars=dict( openshift=dict( common=dict( service_type='origin', @@ -149,7 +148,7 @@ def test_skopeo_update_failure(message, extra_words): openshift_deployment_type="openshift-enterprise", openshift_image_tag='', group_names=['nodes', 'masters'], - )) + )).run() assert actual["failed"] for word in extra_words: @@ -162,12 +161,12 @@ def test_skopeo_update_failure(message, extra_words): ("openshift-enterprise", []), ]) def test_registry_availability(deployment_type, registries): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, *_): return { 'changed': False, } - actual = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( + actual = DockerImageAvailability(execute_module, task_vars=dict( openshift=dict( common=dict( service_type='origin', @@ -179,7 +178,7 @@ def test_registry_availability(deployment_type, registries): openshift_deployment_type=deployment_type, openshift_image_tag='', group_names=['nodes', 'masters'], - )) + )).run() assert not actual.get("failed", False) @@ -258,7 +257,7 @@ def test_required_images(deployment_type, is_containerized, groups, oreg_url, ex openshift_image_tag='vtest', ) - assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) + assert expected == DockerImageAvailability("DUMMY", task_vars).required_images() def test_containerized_etcd(): @@ -272,4 +271,4 @@ def test_containerized_etcd(): group_names=['etcd'], ) expected = set(['registry.access.redhat.com/rhel7/etcd']) - assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) + assert expected == DockerImageAvailability("DUMMY", task_vars).required_images() diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 99c529054..e0dccc062 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -4,12 +4,6 @@ from openshift_checks import OpenShiftCheckException from openshift_checks.docker_storage import DockerStorage -def dummy_check(execute_module=None): - def dummy_exec(self, status, task_vars): - raise Exception("dummy executor called") - return DockerStorage(execute_module=execute_module or dummy_exec) - - @pytest.mark.parametrize('is_containerized, group_names, is_active', [ (False, ["masters", "etcd"], False), (False, ["masters", "nodes"], True), @@ -20,7 +14,7 @@ def test_is_active(is_containerized, group_names, is_active): openshift=dict(common=dict(is_containerized=is_containerized)), group_names=group_names, ) - assert DockerStorage.is_active(task_vars=task_vars) == is_active + assert DockerStorage(None, task_vars).is_active() == is_active def non_atomic_task_vars(): @@ -99,17 +93,17 @@ def non_atomic_task_vars(): ), ]) def test_check_storage_driver(docker_info, failed, expect_msg): - def execute_module(module_name, module_args, tmp=None, task_vars=None): + def execute_module(module_name, *_): if module_name == "yum": return {} if module_name != "docker_info": raise ValueError("not expecting module " + module_name) return docker_info - check = dummy_check(execute_module=execute_module) - check.check_dm_usage = lambda status, task_vars: dict() # stub out for this test - check.check_overlay_usage = lambda info, task_vars: dict() # stub out for this test - result = check.run(tmp=None, task_vars=non_atomic_task_vars()) + check = DockerStorage(execute_module, non_atomic_task_vars()) + check.check_dm_usage = lambda status: dict() # stub out for this test + check.check_overlay_usage = lambda info: dict() # stub out for this test + result = check.run() if failed: assert result["failed"] @@ -168,9 +162,9 @@ not_enough_space = { ), ]) def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): - check = dummy_check() - check.get_vg_free = lambda pool, task_vars: vg_free - result = check.check_dm_usage(driver_status, task_vars) + check = DockerStorage(None, task_vars) + check.get_vg_free = lambda pool: vg_free + result = check.check_dm_usage(driver_status) result_success = not result.get("failed") assert result_success is success @@ -210,18 +204,18 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): ) ]) def test_vg_free(pool, command_returns, raises, returns): - def execute_module(module_name, module_args, tmp=None, task_vars=None): + def execute_module(module_name, *_): if module_name != "command": raise ValueError("not expecting module " + module_name) return command_returns - check = dummy_check(execute_module=execute_module) + check = DockerStorage(execute_module) if raises: with pytest.raises(OpenShiftCheckException) as err: - check.get_vg_free(pool, {}) + check.get_vg_free(pool) assert raises in str(err.value) else: - ret = check.get_vg_free(pool, {}) + ret = check.get_vg_free(pool) assert ret == returns @@ -298,13 +292,13 @@ ansible_mounts_zero_size = [{ ), ]) def test_overlay_usage(ansible_mounts, threshold, expect_fail, expect_msg): - check = dummy_check() task_vars = non_atomic_task_vars() task_vars["ansible_mounts"] = ansible_mounts if threshold is not None: task_vars["max_overlay_usage_percent"] = threshold + check = DockerStorage(None, task_vars) docker_info = dict(DockerRootDir="/var/lib/docker", Driver="overlay") - result = check.check_overlay_usage(docker_info, task_vars) + result = check.check_overlay_usage(docker_info) assert expect_fail == bool(result.get("failed")) for msg in expect_msg: diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py index b9d375d8c..9edfc17c7 100644 --- a/roles/openshift_health_checker/test/elasticsearch_test.py +++ b/roles/openshift_health_checker/test/elasticsearch_test.py @@ -6,9 +6,9 @@ from openshift_checks.logging.elasticsearch import Elasticsearch task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin'))) -def canned_elasticsearch(exec_oc=None): +def canned_elasticsearch(task_vars=None, exec_oc=None): """Create an Elasticsearch check object with canned exec_oc method""" - check = Elasticsearch("dummy") # fails if a module is actually invoked + check = Elasticsearch("dummy", task_vars or {}) # fails if a module is actually invoked if exec_oc: check._exec_oc = exec_oc return check @@ -50,10 +50,10 @@ split_es_pod = { def test_check_elasticsearch(): - assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([], {}) + assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([]) # canned oc responses to match so all the checks pass - def _exec_oc(cmd, args, task_vars): + def _exec_oc(cmd, args): if '_cat/master' in cmd: return 'name logging-es' elif '/_nodes' in cmd: @@ -65,7 +65,7 @@ def test_check_elasticsearch(): else: raise Exception(cmd) - assert not canned_elasticsearch(_exec_oc).check_elasticsearch([plain_es_pod], {}) + assert not canned_elasticsearch({}, _exec_oc).check_elasticsearch([plain_es_pod]) def pods_by_name(pods): @@ -88,9 +88,9 @@ def pods_by_name(pods): ]) def test_check_elasticsearch_masters(pods, expect_error): test_pods = list(pods) - check = canned_elasticsearch(lambda cmd, args, task_vars: test_pods.pop(0)['_test_master_name_str']) + check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: test_pods.pop(0)['_test_master_name_str']) - errors = check._check_elasticsearch_masters(pods_by_name(pods), task_vars_config_base) + errors = check._check_elasticsearch_masters(pods_by_name(pods)) assert_error(''.join(errors), expect_error) @@ -124,9 +124,9 @@ es_node_list = { ), ]) def test_check_elasticsearch_node_list(pods, node_list, expect_error): - check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(node_list)) + check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list)) - errors = check._check_elasticsearch_node_list(pods_by_name(pods), task_vars_config_base) + errors = check._check_elasticsearch_node_list(pods_by_name(pods)) assert_error(''.join(errors), expect_error) @@ -149,9 +149,9 @@ def test_check_elasticsearch_node_list(pods, node_list, expect_error): ]) def test_check_elasticsearch_cluster_health(pods, health_data, expect_error): test_health_data = list(health_data) - check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(test_health_data.pop(0))) + check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(test_health_data.pop(0))) - errors = check._check_es_cluster_health(pods_by_name(pods), task_vars_config_base) + errors = check._check_es_cluster_health(pods_by_name(pods)) assert_error(''.join(errors), expect_error) @@ -174,7 +174,7 @@ def test_check_elasticsearch_cluster_health(pods, health_data, expect_error): ), ]) def test_check_elasticsearch_diskspace(disk_data, expect_error): - check = canned_elasticsearch(lambda cmd, args, task_vars: disk_data) + check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: disk_data) - errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]), task_vars_config_base) + errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod])) assert_error(''.join(errors), expect_error) diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py index df9d52d41..e3d6706fa 100644 --- a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py +++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py @@ -51,10 +51,10 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words): task_vars = dict( ansible_mounts=ansible_mounts, ) - check = EtcdImageDataSize(execute_module=fake_execute_module) + check = EtcdImageDataSize(fake_execute_module, task_vars) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + check.run() for word in 'determine valid etcd mountpath'.split() + extra_words: assert word in str(excinfo.value) @@ -111,14 +111,14 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words): ) ]) def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size_limit, should_fail, extra_words): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, *_): if module_name != "etcdkeysize": return { "changed": False, } client = fake_etcd_client(tree) - s, limit_exceeded = check_etcd_key_size(client, tree["key"], args["size_limit_bytes"]) + s, limit_exceeded = check_etcd_key_size(client, tree["key"], module_args["size_limit_bytes"]) return {"size_limit_exceeded": limit_exceeded} @@ -133,7 +133,7 @@ def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size if size_limit is None: task_vars.pop("etcd_max_image_data_size_bytes") - check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + check = EtcdImageDataSize(execute_module, task_vars).run() if should_fail: assert check["failed"] @@ -267,14 +267,14 @@ def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size ), ]) def test_etcd_key_size_check_calculates_correct_size(ansible_mounts, tree, root_path, expected_size, extra_words): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, *_): if module_name != "etcdkeysize": return { "changed": False, } client = fake_etcd_client(tree) - size, limit_exceeded = check_etcd_key_size(client, root_path, args["size_limit_bytes"]) + size, limit_exceeded = check_etcd_key_size(client, root_path, module_args["size_limit_bytes"]) assert size == expected_size return { @@ -289,12 +289,12 @@ def test_etcd_key_size_check_calculates_correct_size(ansible_mounts, tree, root_ ) ) - check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + check = EtcdImageDataSize(execute_module, task_vars).run() assert not check.get("failed", False) def test_etcdkeysize_module_failure(): - def execute_module(module_name, tmp=None, task_vars=None): + def execute_module(module_name, *_): if module_name != "etcdkeysize": return { "changed": False, @@ -317,7 +317,7 @@ def test_etcdkeysize_module_failure(): ) ) - check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + check = EtcdImageDataSize(execute_module, task_vars).run() assert check["failed"] for word in "Failed to retrieve stats": diff --git a/roles/openshift_health_checker/test/etcd_traffic_test.py b/roles/openshift_health_checker/test/etcd_traffic_test.py index 287175e29..f4316c423 100644 --- a/roles/openshift_health_checker/test/etcd_traffic_test.py +++ b/roles/openshift_health_checker/test/etcd_traffic_test.py @@ -21,7 +21,7 @@ def test_is_active(group_names, version, is_active): common=dict(short_version=version), ), ) - assert EtcdTraffic.is_active(task_vars=task_vars) == is_active + assert EtcdTraffic(task_vars=task_vars).is_active() == is_active @pytest.mark.parametrize('group_names,matched,failed,extra_words', [ @@ -30,7 +30,7 @@ def test_is_active(group_names, version, is_active): (["etcd"], False, False, []), ]) def test_log_matches_high_traffic_msg(group_names, matched, failed, extra_words): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, *_): return { "matched": matched, "failed": failed, @@ -43,8 +43,7 @@ def test_log_matches_high_traffic_msg(group_names, matched, failed, extra_words) ) ) - check = EtcdTraffic(execute_module=execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = EtcdTraffic(execute_module, task_vars).run() for word in extra_words: assert word in result.get("msg", "") @@ -63,7 +62,7 @@ def test_systemd_unit_matches_deployment_type(is_containerized, expected_unit_va ) ) - def execute_module(module_name, args, task_vars): + def execute_module(module_name, args, *_): assert module_name == "search_journalctl" matchers = args["log_matchers"] @@ -72,9 +71,4 @@ def test_systemd_unit_matches_deployment_type(is_containerized, expected_unit_va return {"failed": False} - check = EtcdTraffic(execute_module=execute_module) - check.run(tmp=None, task_vars=task_vars) - - -def fake_execute_module(*args): - raise AssertionError('this function should not be called') + EtcdTraffic(execute_module, task_vars).run() diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py index 917045526..0b255136e 100644 --- a/roles/openshift_health_checker/test/etcd_volume_test.py +++ b/roles/openshift_health_checker/test/etcd_volume_test.py @@ -11,10 +11,9 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): task_vars = dict( ansible_mounts=ansible_mounts, ) - check = EtcdVolume(execute_module=fake_execute_module) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + EtcdVolume(fake_execute_module, task_vars).run() for word in 'Unable to find etcd storage mount point'.split() + extra_words: assert word in str(excinfo.value) @@ -76,8 +75,7 @@ def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts): if task_vars["etcd_device_usage_threshold_percent"] is None: task_vars.pop("etcd_device_usage_threshold_percent") - check = EtcdVolume(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = EtcdVolume(fake_execute_module, task_vars).run() assert not result.get('failed', False) @@ -137,8 +135,7 @@ def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts, if task_vars["etcd_device_usage_threshold_percent"] is None: task_vars.pop("etcd_device_usage_threshold_percent") - check = EtcdVolume(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = EtcdVolume(fake_execute_module, task_vars).run() assert result['failed'] for word in extra_words: diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py index d151c0b19..9cee57868 100644 --- a/roles/openshift_health_checker/test/fluentd_test.py +++ b/roles/openshift_health_checker/test/fluentd_test.py @@ -103,7 +103,7 @@ fluentd_node3_unlabeled = { ), ]) def test_get_fluentd_pods(pods, nodes, expect_error): - check = canned_fluentd(lambda cmd, args, task_vars: json.dumps(dict(items=nodes))) + check = canned_fluentd(exec_oc=lambda cmd, args: json.dumps(dict(items=nodes))) - error = check.check_fluentd(pods, {}) + error = check.check_fluentd(pods) assert_error(error, expect_error) diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py index 40a5d19d8..3a880d300 100644 --- a/roles/openshift_health_checker/test/kibana_test.py +++ b/roles/openshift_health_checker/test/kibana_test.py @@ -13,7 +13,7 @@ from openshift_checks.logging.kibana import Kibana def canned_kibana(exec_oc=None): """Create a Kibana check object with canned exec_oc method""" - check = Kibana("dummy") # fails if a module is actually invoked + check = Kibana() # fails if a module is actually invoked if exec_oc: check._exec_oc = exec_oc return check @@ -137,9 +137,9 @@ def test_check_kibana(pods, expect_error): ), ]) def test_get_kibana_url(route, expect_url, expect_error): - check = canned_kibana(lambda cmd, args, task_vars: json.dumps(route) if route else "") + check = canned_kibana(exec_oc=lambda cmd, args: json.dumps(route) if route else "") - url, error = check._get_kibana_url({}) + url, error = check._get_kibana_url() if expect_url: assert url == expect_url else: @@ -169,10 +169,10 @@ def test_get_kibana_url(route, expect_url, expect_error): ), ]) def test_verify_url_internal_failure(exec_result, expect): - check = Kibana(execute_module=lambda module_name, args, tmp, task_vars: dict(failed=True, msg=exec_result)) - check._get_kibana_url = lambda task_vars: ('url', None) + check = Kibana(execute_module=lambda *_: dict(failed=True, msg=exec_result)) + check._get_kibana_url = lambda: ('url', None) - error = check._check_kibana_route({}) + error = check._check_kibana_route() assert_error(error, expect) @@ -211,8 +211,8 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch): monkeypatch.setattr(urllib2, 'urlopen', urlopen) check = canned_kibana() - check._get_kibana_url = lambda task_vars: ('url', None) - check._verify_url_internal = lambda url, task_vars: None + check._get_kibana_url = lambda: ('url', None) + check._verify_url_internal = lambda url: None - error = check._check_kibana_route({}) + error = check._check_kibana_route() assert_error(error, expect) diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py index 128b76b12..6f1697ee6 100644 --- a/roles/openshift_health_checker/test/logging_check_test.py +++ b/roles/openshift_health_checker/test/logging_check_test.py @@ -11,7 +11,7 @@ logging_namespace = "logging" def canned_loggingcheck(exec_oc=None): """Create a LoggingCheck object with canned exec_oc method""" - check = LoggingCheck("dummy") # fails if a module is actually invoked + check = LoggingCheck() # fails if a module is actually invoked check.logging_namespace = 'logging' if exec_oc: check.exec_oc = exec_oc @@ -90,15 +90,15 @@ plain_curator_pod = { ("Permission denied", "Unexpected error using `oc`"), ]) def test_oc_failure(problem, expect): - def execute_module(module_name, args, tmp, task_vars): + def execute_module(module_name, *_): if module_name == "ocutil": return dict(failed=True, result=problem) return dict(changed=False) - check = LoggingCheck({}) + check = LoggingCheck(execute_module, task_vars_config_base) with pytest.raises(OpenShiftCheckException) as excinfo: - check.exec_oc(execute_module, logging_namespace, 'get foo', [], task_vars=task_vars_config_base) + check.exec_oc(logging_namespace, 'get foo', []) assert expect in str(excinfo) @@ -121,14 +121,14 @@ def test_is_active(groups, logging_deployed, is_active): openshift_hosted_logging_deploy=logging_deployed, ) - assert LoggingCheck.is_active(task_vars=task_vars) == is_active + assert LoggingCheck(None, task_vars).is_active() == is_active @pytest.mark.parametrize('pod_output, expect_pods, expect_error', [ ( 'No resources found.', None, - 'There are no pods in the logging namespace', + 'No pods were found for the "es"', ), ( json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}), @@ -137,12 +137,10 @@ def test_is_active(groups, logging_deployed, is_active): ), ]) def test_get_pods_for_component(pod_output, expect_pods, expect_error): - check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: pod_output) + check = canned_loggingcheck(lambda namespace, cmd, args: pod_output) pods, error = check.get_pods_for_component( - lambda name, args, task_vars: {}, logging_namespace, "es", - {} ) assert_error(error, expect_error) diff --git a/roles/openshift_health_checker/test/logging_index_time_test.py b/roles/openshift_health_checker/test/logging_index_time_test.py new file mode 100644 index 000000000..178d7cd84 --- /dev/null +++ b/roles/openshift_health_checker/test/logging_index_time_test.py @@ -0,0 +1,170 @@ +import json + +import pytest + +from openshift_checks.logging.logging_index_time import LoggingIndexTime, OpenShiftCheckException + + +SAMPLE_UUID = "unique-test-uuid" + + +def canned_loggingindextime(exec_oc=None): + """Create a check object with a canned exec_oc method""" + check = LoggingIndexTime() # fails if a module is actually invoked + if exec_oc: + check.exec_oc = exec_oc + return check + + +plain_running_elasticsearch_pod = { + "metadata": { + "labels": {"component": "es", "deploymentconfig": "logging-es-data-master"}, + "name": "logging-es-data-master-1", + }, + "status": { + "containerStatuses": [{"ready": True}, {"ready": True}], + "phase": "Running", + } +} +plain_running_kibana_pod = { + "metadata": { + "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, + "name": "logging-kibana-1", + }, + "status": { + "containerStatuses": [{"ready": True}, {"ready": True}], + "phase": "Running", + } +} +not_running_kibana_pod = { + "metadata": { + "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, + "name": "logging-kibana-2", + }, + "status": { + "containerStatuses": [{"ready": True}, {"ready": False}], + "conditions": [{"status": "True", "type": "Ready"}], + "phase": "pending", + } +} + + +@pytest.mark.parametrize('pods, expect_pods', [ + ( + [not_running_kibana_pod], + [], + ), + ( + [plain_running_kibana_pod], + [plain_running_kibana_pod], + ), + ( + [], + [], + ) +]) +def test_check_running_pods(pods, expect_pods): + check = canned_loggingindextime() + pods = check.running_pods(pods) + assert pods == expect_pods + + +@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [ + ( + 'valid count in response', + { + "count": 1, + }, + SAMPLE_UUID, + 0.001, + [], + ), +], ids=lambda argval: argval[0]) +def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout, extra_words): + check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout) + + +@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [ + ( + 'invalid json response', + { + "invalid_field": 1, + }, + SAMPLE_UUID, + 0.001, + ["invalid response", "Elasticsearch"], + ), + ( + 'empty response', + {}, + SAMPLE_UUID, + 0.001, + ["invalid response", "Elasticsearch"], + ), + ( + 'valid response but invalid match count', + { + "count": 0, + }, + SAMPLE_UUID, + 0.005, + ["expecting match", SAMPLE_UUID, "0.005s"], + ) +], ids=lambda argval: argval[0]) +def test_wait_until_cmd_or_err(name, json_response, uuid, timeout, extra_words): + check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + with pytest.raises(OpenShiftCheckException) as error: + check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout) + + for word in extra_words: + assert word in str(error) + + +@pytest.mark.parametrize('name, json_response, uuid, extra_words', [ + ( + 'correct response code, found unique id is returned', + { + "statusCode": 404, + }, + "sample unique id", + ["sample unique id"], + ), +], ids=lambda argval: argval[0]) +def test_curl_kibana_with_uuid(name, json_response, uuid, extra_words): + check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + check.generate_uuid = lambda: uuid + + result = check.curl_kibana_with_uuid(plain_running_kibana_pod) + + for word in extra_words: + assert word in result + + +@pytest.mark.parametrize('name, json_response, uuid, extra_words', [ + ( + 'invalid json response', + { + "invalid_field": "invalid", + }, + SAMPLE_UUID, + ["invalid response returned", 'Missing "statusCode" key'], + ), + ( + 'wrong error code in response', + { + "statusCode": 500, + }, + SAMPLE_UUID, + ["Expecting error code", "500"], + ), +], ids=lambda argval: argval[0]) +def test_failed_curl_kibana_with_uuid(name, json_response, uuid, extra_words): + check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + check.generate_uuid = lambda: uuid + + with pytest.raises(OpenShiftCheckException) as error: + check.curl_kibana_with_uuid(plain_running_kibana_pod) + + for word in extra_words: + assert word in str(error) diff --git a/roles/openshift_health_checker/test/memory_availability_test.py b/roles/openshift_health_checker/test/memory_availability_test.py index 4fbaea0a9..aee2f0416 100644 --- a/roles/openshift_health_checker/test/memory_availability_test.py +++ b/roles/openshift_health_checker/test/memory_availability_test.py @@ -17,7 +17,7 @@ def test_is_active(group_names, is_active): task_vars = dict( group_names=group_names, ) - assert MemoryAvailability.is_active(task_vars=task_vars) == is_active + assert MemoryAvailability(None, task_vars).is_active() == is_active @pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb', [ @@ -59,8 +59,7 @@ def test_succeeds_with_recommended_memory(group_names, configured_min, ansible_m ansible_memtotal_mb=ansible_memtotal_mb, ) - check = MemoryAvailability(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = MemoryAvailability(fake_execute_module, task_vars).run() assert not result.get('failed', False) @@ -117,8 +116,7 @@ def test_fails_with_insufficient_memory(group_names, configured_min, ansible_mem ansible_memtotal_mb=ansible_memtotal_mb, ) - check = MemoryAvailability(execute_module=fake_execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = MemoryAvailability(fake_execute_module, task_vars).run() assert result.get('failed', False) for word in 'below recommended'.split() + extra_words: diff --git a/roles/openshift_health_checker/test/mixins_test.py b/roles/openshift_health_checker/test/mixins_test.py index 2d83e207d..b1a41ca3c 100644 --- a/roles/openshift_health_checker/test/mixins_test.py +++ b/roles/openshift_health_checker/test/mixins_test.py @@ -14,10 +14,10 @@ class NotContainerizedCheck(NotContainerizedMixin, OpenShiftCheck): (dict(openshift=dict(common=dict(is_containerized=True))), False), ]) def test_is_active(task_vars, expected): - assert NotContainerizedCheck.is_active(task_vars) == expected + assert NotContainerizedCheck(None, task_vars).is_active() == expected def test_is_active_missing_task_vars(): with pytest.raises(OpenShiftCheckException) as excinfo: - NotContainerizedCheck.is_active(task_vars={}) + NotContainerizedCheck().is_active() assert 'is_containerized' in str(excinfo.value) diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py index e3153979c..43aa875f4 100644 --- a/roles/openshift_health_checker/test/openshift_check_test.py +++ b/roles/openshift_health_checker/test/openshift_check_test.py @@ -1,7 +1,7 @@ import pytest from openshift_checks import OpenShiftCheck, OpenShiftCheckException -from openshift_checks import load_checks, get_var +from openshift_checks import load_checks # Fixtures @@ -28,34 +28,23 @@ def test_OpenShiftCheck_init(): name = "test_check" run = NotImplemented - # initialization requires at least one argument (apart from self) - with pytest.raises(TypeError) as excinfo: - TestCheck() + # execute_module required at init if it will be used + with pytest.raises(RuntimeError) as excinfo: + TestCheck().execute_module("foo") assert 'execute_module' in str(excinfo.value) - assert 'module_executor' in str(excinfo.value) execute_module = object() # initialize with positional argument check = TestCheck(execute_module) - # new recommended name - assert check.execute_module == execute_module - # deprecated attribute name - assert check.module_executor == execute_module + assert check._execute_module == execute_module - # initialize with keyword argument, recommended name + # initialize with keyword argument check = TestCheck(execute_module=execute_module) - # new recommended name - assert check.execute_module == execute_module - # deprecated attribute name - assert check.module_executor == execute_module + assert check._execute_module == execute_module - # initialize with keyword argument, deprecated name - check = TestCheck(module_executor=execute_module) - # new recommended name - assert check.execute_module == execute_module - # deprecated attribute name - assert check.module_executor == execute_module + assert check.task_vars == {} + assert check.tmp is None def test_subclasses(): @@ -81,19 +70,27 @@ def test_load_checks(): assert modules +def dummy_check(task_vars): + class TestCheck(OpenShiftCheck): + name = "dummy" + run = NotImplemented + + return TestCheck(task_vars=task_vars) + + @pytest.mark.parametrize("keys,expected", [ (("foo",), 42), (("bar", "baz"), "openshift"), ]) def test_get_var_ok(task_vars, keys, expected): - assert get_var(task_vars, *keys) == expected + assert dummy_check(task_vars).get_var(*keys) == expected def test_get_var_error(task_vars, missing_keys): with pytest.raises(OpenShiftCheckException): - get_var(task_vars, *missing_keys) + dummy_check(task_vars).get_var(*missing_keys) def test_get_var_default(task_vars, missing_keys): default = object() - assert get_var(task_vars, *missing_keys, default=default) == default + assert dummy_check(task_vars).get_var(*missing_keys, default=default) == default diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py index 6494e1c06..b6acef5a6 100644 --- a/roles/openshift_health_checker/test/ovs_version_test.py +++ b/roles/openshift_health_checker/test/ovs_version_test.py @@ -4,7 +4,7 @@ from openshift_checks.ovs_version import OvsVersion, OpenShiftCheckException def test_openshift_version_not_supported(): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(*_): return {} openshift_release = '111.7.0' @@ -16,15 +16,14 @@ def test_openshift_version_not_supported(): openshift_deployment_type='origin', ) - check = OvsVersion(execute_module=execute_module) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + OvsVersion(execute_module, task_vars).run() assert "no recommended version of Open vSwitch" in str(excinfo.value) def test_invalid_openshift_release_format(): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(*_): return {} task_vars = dict( @@ -33,9 +32,8 @@ def test_invalid_openshift_release_format(): openshift_deployment_type='origin', ) - check = OvsVersion(execute_module=execute_module) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + OvsVersion(execute_module, task_vars).run() assert "invalid version" in str(excinfo.value) @@ -54,7 +52,7 @@ def test_ovs_package_version(openshift_release, expected_ovs_version): ) return_value = object() - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, module_args=None, *_): assert module_name == 'rpm_version' assert "package_list" in module_args @@ -64,8 +62,7 @@ def test_ovs_package_version(openshift_release, expected_ovs_version): return return_value - check = OvsVersion(execute_module=execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = OvsVersion(execute_module, task_vars).run() assert result is return_value @@ -86,4 +83,4 @@ def test_ovs_version_skip_when_not_master_nor_node(group_names, is_containerized group_names=group_names, openshift=dict(common=dict(is_containerized=is_containerized)), ) - assert OvsVersion.is_active(task_vars=task_vars) == is_active + assert OvsVersion(None, task_vars).is_active() == is_active diff --git a/roles/openshift_health_checker/test/package_availability_test.py b/roles/openshift_health_checker/test/package_availability_test.py index f7e916a46..1fe648b75 100644 --- a/roles/openshift_health_checker/test/package_availability_test.py +++ b/roles/openshift_health_checker/test/package_availability_test.py @@ -14,7 +14,7 @@ def test_is_active(pkg_mgr, is_containerized, is_active): ansible_pkg_mgr=pkg_mgr, openshift=dict(common=dict(is_containerized=is_containerized)), ) - assert PackageAvailability.is_active(task_vars=task_vars) == is_active + assert PackageAvailability(None, task_vars).is_active() == is_active @pytest.mark.parametrize('task_vars,must_have_packages,must_not_have_packages', [ @@ -51,13 +51,12 @@ def test_is_active(pkg_mgr, is_containerized, is_active): def test_package_availability(task_vars, must_have_packages, must_not_have_packages): return_value = object() - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, module_args=None, *_): assert module_name == 'check_yum_update' assert 'packages' in module_args assert set(module_args['packages']).issuperset(must_have_packages) assert not set(module_args['packages']).intersection(must_not_have_packages) return return_value - check = PackageAvailability(execute_module=execute_module) - result = check.run(tmp=None, task_vars=task_vars) + result = PackageAvailability(execute_module, task_vars).run() assert result is return_value diff --git a/roles/openshift_health_checker/test/package_update_test.py b/roles/openshift_health_checker/test/package_update_test.py index 5e000cff5..06489b0d7 100644 --- a/roles/openshift_health_checker/test/package_update_test.py +++ b/roles/openshift_health_checker/test/package_update_test.py @@ -4,13 +4,12 @@ from openshift_checks.package_update import PackageUpdate def test_package_update(): return_value = object() - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, module_args=None, *_): assert module_name == 'check_yum_update' assert 'packages' in module_args # empty list of packages means "generic check if 'yum update' will work" assert module_args['packages'] == [] return return_value - check = PackageUpdate(execute_module=execute_module) - result = check.run(tmp=None, task_vars=None) + result = PackageUpdate(execute_module).run() assert result is return_value diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py index 1bb6371ae..1ddb9cecb 100644 --- a/roles/openshift_health_checker/test/package_version_test.py +++ b/roles/openshift_health_checker/test/package_version_test.py @@ -8,7 +8,7 @@ from openshift_checks.package_version import PackageVersion, OpenShiftCheckExcep ('0.0.0', ["no recommended version of Docker"]), ]) def test_openshift_version_not_supported(openshift_release, extra_words): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(*_): return {} task_vars = dict( @@ -18,16 +18,16 @@ def test_openshift_version_not_supported(openshift_release, extra_words): openshift_deployment_type='origin', ) - check = PackageVersion(execute_module=execute_module) + check = PackageVersion(execute_module, task_vars) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + check.run() for word in extra_words: assert word in str(excinfo.value) def test_invalid_openshift_release_format(): - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(*_): return {} task_vars = dict( @@ -36,9 +36,9 @@ def test_invalid_openshift_release_format(): openshift_deployment_type='origin', ) - check = PackageVersion(execute_module=execute_module) + check = PackageVersion(execute_module, task_vars) with pytest.raises(OpenShiftCheckException) as excinfo: - check.run(tmp=None, task_vars=task_vars) + check.run() assert "invalid version" in str(excinfo.value) @@ -57,7 +57,7 @@ def test_package_version(openshift_release): ) return_value = object() - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *_): assert module_name == 'aos_version' assert "package_list" in module_args @@ -67,8 +67,8 @@ def test_package_version(openshift_release): return return_value - check = PackageVersion(execute_module=execute_module) - result = check.run(tmp=None, task_vars=task_vars) + check = PackageVersion(execute_module, task_vars) + result = check.run() assert result is return_value @@ -89,7 +89,7 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc ) return_value = object() - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): + def execute_module(module_name=None, module_args=None, *_): assert module_name == 'aos_version' assert "package_list" in module_args @@ -99,8 +99,8 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc return return_value - check = PackageVersion(execute_module=execute_module) - result = check.run(tmp=None, task_vars=task_vars) + check = PackageVersion(execute_module, task_vars) + result = check.run() assert result is return_value @@ -121,4 +121,4 @@ def test_package_version_skip_when_not_master_nor_node(group_names, is_container group_names=group_names, openshift=dict(common=dict(is_containerized=is_containerized)), ) - assert PackageVersion.is_active(task_vars=task_vars) == is_active + assert PackageVersion(None, task_vars).is_active() == is_active diff --git a/roles/openshift_hosted/defaults/main.yml b/roles/openshift_hosted/defaults/main.yml index 089054e2f..0391e5602 100644 --- a/roles/openshift_hosted/defaults/main.yml +++ b/roles/openshift_hosted/defaults/main.yml @@ -29,7 +29,7 @@ openshift_hosted_routers: openshift_hosted_router_certificate: {} openshift_hosted_registry_cert_expire_days: 730 -openshift_hosted_router_create_certificate: False +openshift_hosted_router_create_certificate: True os_firewall_allow: - service: Docker Registry Port diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index c60b67862..dd485a64a 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -23,8 +23,8 @@ signer_key: "{{ openshift_master_config_dir }}/ca.key" signer_serial: "{{ openshift_master_config_dir }}/ca.serial.txt" hostnames: - - "{{ openshift_master_default_subdomain }}" - - "*.{{ openshift_master_default_subdomain }}" + - "{{ openshift_master_default_subdomain | default('router.default.svc.cluster.local') }}" + - "*.{{ openshift_master_default_subdomain | default('router.default.svc.cluster.local') }}" cert: "{{ ('/etc/origin/master/' ~ (item.certificate.certfile | basename)) if 'certfile' in item.certificate else ((openshift_master_config_dir) ~ '/openshift-router.crt') }}" key: "{{ ('/etc/origin/master/' ~ (item.certificate.keyfile | basename)) if 'keyfile' in item.certificate else ((openshift_master_config_dir) ~ '/openshift-router.key') }}" with_items: "{{ openshift_hosted_routers }}" @@ -37,7 +37,7 @@ cafile: "{{ openshift_master_config_dir ~ '/ca.crt' }}" # End Block - when: openshift_hosted_router_create_certificate | bool + when: ( openshift_hosted_router_create_certificate | bool ) and openshift_hosted_router_certificate == {} - name: Get the certificate contents for router copy: diff --git a/roles/openshift_loadbalancer/README.md b/roles/openshift_loadbalancer/README.md index bea4c509b..330895f20 100644 --- a/roles/openshift_loadbalancer/README.md +++ b/roles/openshift_loadbalancer/README.md @@ -25,6 +25,7 @@ From this role: | openshift_loadbalancer_default_maxconn | 20000 | Maximum per-process number of concurrent connections. | | openshift_loadbalancer_frontends | none | List of frontends. See example below. | | openshift_loadbalancer_backends | none | List of backends. See example below. | +| openshift_image_tag | none | Image tag for containerized haproxy image. | Dependencies ------------ @@ -64,6 +65,7 @@ Example Playbook - name: master3 address: "192.168.122.223:8443" opts: check + openshift_image_tag: v3.6.153 ``` License diff --git a/roles/openshift_logging_kibana/defaults/main.yml b/roles/openshift_logging_kibana/defaults/main.yml index 23337bcd2..b2556fd71 100644 --- a/roles/openshift_logging_kibana/defaults/main.yml +++ b/roles/openshift_logging_kibana/defaults/main.yml @@ -11,7 +11,7 @@ openshift_logging_kibana_nodeselector: "" openshift_logging_kibana_cpu_limit: null openshift_logging_kibana_memory_limit: 736Mi -openshift_logging_kibana_hostname: "kibana.router.default.svc.cluster.local" +openshift_logging_kibana_hostname: "{{ openshift_hosted_logging_hostname | default('kibana.' ~ (openshift_master_default_subdomain | default('router.default.svc.cluster.local', true))) }}" openshift_logging_kibana_es_host: "logging-es" openshift_logging_kibana_es_port: 9200 diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 9b7125240..0c4ee319c 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -140,6 +140,12 @@ - set_fact: openshift_push_via_dns: "{{ (openshift_use_dnsmasq | default(true) and openshift.common.version_gte_3_6) or (already_set.stdout | match('OPENSHIFT_DEFAULT_REGISTRY=docker-registry.default.svc:5000')) }}" +- name: Set fact of all etcd host IPs + openshift_facts: + role: common + local_facts: + no_proxy_etcd_host_ips: "{{ openshift_no_proxy_etcd_host_ips }}" + - name: Install the systemd units include: systemd_units.yml @@ -200,6 +206,10 @@ delay: 60 notify: Verify API Server +- name: Dump logs from master service if it failed + command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master + when: start_result | failed + - name: Stop and disable non-HA master when running HA systemd: name: "{{ openshift.common.service_type }}-master" @@ -233,6 +243,10 @@ retries: 1 delay: 60 +- name: Dump logs from master-api if it failed + command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master-api + when: start_result | failed + - set_fact: master_api_service_status_changed: "{{ start_result | changed }}" when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] @@ -252,6 +266,10 @@ retries: 1 delay: 60 +- name: Dump logs from master-api if it failed + command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master-api + when: start_result | failed + - set_fact: master_api_service_status_changed: "{{ start_result | changed }}" when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] @@ -288,6 +306,10 @@ retries: 1 delay: 60 +- name: Dump logs from master-controllers if it failed + command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master-controllers + when: start_result | failed + - name: Wait for master controller service to start on first master pause: seconds: 15 @@ -304,6 +326,10 @@ retries: 1 delay: 60 +- name: Dump logs from master-controllers if it failed + command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master-controllers + when: start_result | failed + - set_fact: master_controllers_service_status_changed: "{{ start_result | changed }}" when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' diff --git a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml index 8d7ee00ed..31129a6ac 100644 --- a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml +++ b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml @@ -26,7 +26,6 @@ - name: generate htpasswd file for hawkular metrics local_action: htpasswd path="{{ local_tmp.stdout }}/hawkular-metrics.htpasswd" name=hawkular password="{{ hawkular_metrics_pwd.content | b64decode }}" - no_log: true become: false - name: copy local generated passwords to target diff --git a/roles/openshift_metrics/tasks/generate_rolebindings.yaml b/roles/openshift_metrics/tasks/generate_rolebindings.yaml index e050c8eb2..1304ab8b5 100644 --- a/roles/openshift_metrics/tasks/generate_rolebindings.yaml +++ b/roles/openshift_metrics/tasks/generate_rolebindings.yaml @@ -13,3 +13,27 @@ - kind: ServiceAccount name: hawkular changed_when: no + +- name: generate hawkular-metrics cluster role binding for the hawkular service account + template: + src: rolebinding.j2 + dest: "{{ mktemp.stdout }}/templates/hawkular-cluster-rolebinding.yaml" + vars: + cluster: True + obj_name: hawkular-namespace-watcher + labels: + metrics-infra: hawkular + roleRef: + kind: ClusterRole + name: hawkular-metrics + subjects: + - kind: ServiceAccount + name: hawkular + namespace: "{{openshift_metrics_project}}" + changed_when: no + +- name: generate the hawkular cluster role + template: + src: hawkular_metrics_role.j2 + dest: "{{ mktemp.stdout }}/templates/hawkular-cluster-role.yaml" + changed_when: no diff --git a/roles/openshift_metrics/tasks/uninstall_metrics.yaml b/roles/openshift_metrics/tasks/uninstall_metrics.yaml index 9a5d52eb6..403b1252c 100644 --- a/roles/openshift_metrics/tasks/uninstall_metrics.yaml +++ b/roles/openshift_metrics/tasks/uninstall_metrics.yaml @@ -6,7 +6,7 @@ command: > {{ openshift.common.client_binary }} -n {{ openshift_metrics_project }} --config={{ mktemp.stdout }}/admin.kubeconfig delete --ignore-not-found --selector=metrics-infra - all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings + all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings,clusterrole register: delete_metrics changed_when: delete_metrics.stdout != 'No resources found' @@ -16,4 +16,5 @@ delete --ignore-not-found rolebinding/hawkular-view clusterrolebinding/heapster-cluster-reader + clusterrolebinding/hawkular-metrics changed_when: delete_metrics.stdout != 'No resources found' diff --git a/roles/openshift_metrics/templates/hawkular_metrics_role.j2 b/roles/openshift_metrics/templates/hawkular_metrics_role.j2 new file mode 100644 index 000000000..6c9dbf5d6 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_metrics_role.j2 @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ClusterRole +metadata: + name: hawkular-metrics + labels: + metrics-infra: hawkular-metrics +rules: +- apiGroups: + - "" + resources: + - namespaces + verbs: + - list + - get + - watch diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 879f6c207..0133533fc 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -230,7 +230,7 @@ ignore_errors: true - name: Dump logs from node service if it failed - command: journalctl --no-pager -n 100 {{ openshift.common.service_type }}-node + command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-node when: node_start_result | failed - name: Abort if node failed to start diff --git a/roles/openshift_node/templates/node.service.j2 b/roles/openshift_node/templates/node.service.j2 index 1dbe58439..e12a52c15 100644 --- a/roles/openshift_node/templates/node.service.j2 +++ b/roles/openshift_node/templates/node.service.j2 @@ -24,8 +24,8 @@ WorkingDirectory=/var/lib/origin/ SyslogIdentifier={{ openshift.common.service_type }}-node Restart=always RestartSec=5s +TimeoutStartSec=300 OOMScoreAdjust=-999 -KillMode=process [Install] WantedBy=multi-user.target diff --git a/roles/openshift_node_upgrade/templates/node.service.j2 b/roles/openshift_node_upgrade/templates/node.service.j2 index 1dbe58439..e12a52c15 100644 --- a/roles/openshift_node_upgrade/templates/node.service.j2 +++ b/roles/openshift_node_upgrade/templates/node.service.j2 @@ -24,8 +24,8 @@ WorkingDirectory=/var/lib/origin/ SyslogIdentifier={{ openshift.common.service_type }}-node Restart=always RestartSec=5s +TimeoutStartSec=300 OOMScoreAdjust=-999 -KillMode=process [Install] WantedBy=multi-user.target diff --git a/roles/openshift_repos/defaults/main.yaml b/roles/openshift_repos/defaults/main.yaml index 7c5a14cd7..44f34ea7b 100644 --- a/roles/openshift_repos/defaults/main.yaml +++ b/roles/openshift_repos/defaults/main.yaml @@ -1,2 +1,3 @@ --- openshift_additional_repos: {} +openshift_repos_enable_testing: false diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 8f8550e2d..7458db87e 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -33,7 +33,7 @@ # "centos-release-openshift-origin" package which configures the repository. # This task matches the file names provided by the package so that they are # not installed twice in different files and remains idempotent. - - name: Configure origin gpg keys if needed + - name: Configure origin repositories and gpg keys if needed copy: src: "{{ item.src }}" dest: "{{ item.dest }}" @@ -49,6 +49,10 @@ - openshift_deployment_type == 'origin' - openshift_enable_origin_repo | default(true) | bool + - name: Enable centos-openshift-origin-testing repository + command: yum-config-manager --enable centos-openshift-origin-testing + when: openshift_repos_enable_testing | bool + - name: Ensure clean repo cache in the event repos have been changed manually debug: msg: "First run of openshift_repos" diff --git a/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml b/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml index c30c15778..71e21a269 100644 --- a/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml +++ b/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml @@ -99,7 +99,6 @@ objects: - "" resources: - secrets - - podpresets verbs: - create - update @@ -149,6 +148,11 @@ objects: - podpresets verbs: - create + - update + - delete + - get + - list + - watch - kind: ClusterRoleBinding apiVersion: v1 diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml index b54a8e36c..7a2987883 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml @@ -2,29 +2,29 @@ - set_fact: glusterfs_timeout: "{{ openshift_storage_glusterfs_timeout }}" glusterfs_namespace: "{{ openshift_storage_glusterfs_namespace }}" - glusterfs_is_native: "{{ openshift_storage_glusterfs_is_native }}" + glusterfs_is_native: "{{ openshift_storage_glusterfs_is_native | bool }}" glusterfs_name: "{{ openshift_storage_glusterfs_name }}" glusterfs_nodeselector: "{{ openshift_storage_glusterfs_nodeselector | default(['storagenode', openshift_storage_glusterfs_name] | join('=')) | map_from_pairs }}" glusterfs_storageclass: "{{ openshift_storage_glusterfs_storageclass }}" glusterfs_image: "{{ openshift_storage_glusterfs_image }}" glusterfs_version: "{{ openshift_storage_glusterfs_version }}" - glusterfs_wipe: "{{ openshift_storage_glusterfs_wipe }}" - glusterfs_heketi_is_native: "{{ openshift_storage_glusterfs_heketi_is_native }}" - glusterfs_heketi_is_missing: "{{ openshift_storage_glusterfs_heketi_is_missing }}" - glusterfs_heketi_deploy_is_missing: "{{ openshift_storage_glusterfs_heketi_deploy_is_missing }}" + glusterfs_wipe: "{{ openshift_storage_glusterfs_wipe | bool }}" + glusterfs_heketi_is_native: "{{ openshift_storage_glusterfs_heketi_is_native | bool }}" + glusterfs_heketi_is_missing: "{{ openshift_storage_glusterfs_heketi_is_missing | bool }}" + glusterfs_heketi_deploy_is_missing: "{{ openshift_storage_glusterfs_heketi_deploy_is_missing | bool }}" glusterfs_heketi_cli: "{{ openshift_storage_glusterfs_heketi_cli }}" glusterfs_heketi_image: "{{ openshift_storage_glusterfs_heketi_image }}" glusterfs_heketi_version: "{{ openshift_storage_glusterfs_heketi_version }}" glusterfs_heketi_admin_key: "{{ openshift_storage_glusterfs_heketi_admin_key }}" glusterfs_heketi_user_key: "{{ openshift_storage_glusterfs_heketi_user_key }}" - glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_heketi_topology_load }}" - glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_heketi_wipe }}" + glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_heketi_topology_load | bool }}" + glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_heketi_wipe | bool }}" glusterfs_heketi_url: "{{ openshift_storage_glusterfs_heketi_url }}" glusterfs_heketi_port: "{{ openshift_storage_glusterfs_heketi_port }}" glusterfs_heketi_executor: "{{ openshift_storage_glusterfs_heketi_executor }}" glusterfs_heketi_ssh_port: "{{ openshift_storage_glusterfs_heketi_ssh_port }}" glusterfs_heketi_ssh_user: "{{ openshift_storage_glusterfs_heketi_ssh_user }}" - glusterfs_heketi_ssh_sudo: "{{ openshift_storage_glusterfs_heketi_ssh_sudo }}" + glusterfs_heketi_ssh_sudo: "{{ openshift_storage_glusterfs_heketi_ssh_sudo | bool }}" glusterfs_heketi_ssh_keyfile: "{{ openshift_storage_glusterfs_heketi_ssh_keyfile }}" glusterfs_nodes: "{{ groups.glusterfs }}" diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml index 0b4d1c82b..e46cec378 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml @@ -2,29 +2,29 @@ - set_fact: glusterfs_timeout: "{{ openshift_storage_glusterfs_registry_timeout }}" glusterfs_namespace: "{{ openshift_storage_glusterfs_registry_namespace }}" - glusterfs_is_native: "{{ openshift_storage_glusterfs_registry_is_native }}" + glusterfs_is_native: "{{ openshift_storage_glusterfs_registry_is_native | bool }}" glusterfs_name: "{{ openshift_storage_glusterfs_registry_name }}" glusterfs_nodeselector: "{{ openshift_storage_glusterfs_registry_nodeselector | default(['storagenode', openshift_storage_glusterfs_registry_name] | join('=')) | map_from_pairs }}" glusterfs_storageclass: "{{ openshift_storage_glusterfs_registry_storageclass }}" glusterfs_image: "{{ openshift_storage_glusterfs_registry_image }}" glusterfs_version: "{{ openshift_storage_glusterfs_registry_version }}" - glusterfs_wipe: "{{ openshift_storage_glusterfs_registry_wipe }}" - glusterfs_heketi_is_native: "{{ openshift_storage_glusterfs_registry_heketi_is_native }}" - glusterfs_heketi_is_missing: "{{ openshift_storage_glusterfs_registry_heketi_is_missing }}" - glusterfs_heketi_deploy_is_missing: "{{ openshift_storage_glusterfs_registry_heketi_deploy_is_missing }}" + glusterfs_wipe: "{{ openshift_storage_glusterfs_registry_wipe | bool }}" + glusterfs_heketi_is_native: "{{ openshift_storage_glusterfs_registry_heketi_is_native | bool }}" + glusterfs_heketi_is_missing: "{{ openshift_storage_glusterfs_registry_heketi_is_missing | bool }}" + glusterfs_heketi_deploy_is_missing: "{{ openshift_storage_glusterfs_registry_heketi_deploy_is_missing | bool }}" glusterfs_heketi_cli: "{{ openshift_storage_glusterfs_registry_heketi_cli }}" glusterfs_heketi_image: "{{ openshift_storage_glusterfs_registry_heketi_image }}" glusterfs_heketi_version: "{{ openshift_storage_glusterfs_registry_heketi_version }}" glusterfs_heketi_admin_key: "{{ openshift_storage_glusterfs_registry_heketi_admin_key }}" glusterfs_heketi_user_key: "{{ openshift_storage_glusterfs_registry_heketi_user_key }}" - glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_registry_heketi_topology_load }}" - glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_registry_heketi_wipe }}" + glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_registry_heketi_topology_load | bool }}" + glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_registry_heketi_wipe | bool }}" glusterfs_heketi_url: "{{ openshift_storage_glusterfs_registry_heketi_url }}" glusterfs_heketi_port: "{{ openshift_storage_glusterfs_registry_heketi_port }}" glusterfs_heketi_executor: "{{ openshift_storage_glusterfs_registry_heketi_executor }}" glusterfs_heketi_ssh_port: "{{ openshift_storage_glusterfs_registry_heketi_ssh_port }}" glusterfs_heketi_ssh_user: "{{ openshift_storage_glusterfs_registry_heketi_ssh_user }}" - glusterfs_heketi_ssh_sudo: "{{ openshift_storage_glusterfs_registry_heketi_ssh_sudo }}" + glusterfs_heketi_ssh_sudo: "{{ openshift_storage_glusterfs_registry_heketi_ssh_sudo | bool }}" glusterfs_heketi_ssh_keyfile: "{{ openshift_storage_glusterfs_registry_heketi_ssh_keyfile }}" glusterfs_nodes: "{{ groups.glusterfs_registry | default(groups.glusterfs) }}" @@ -56,7 +56,7 @@ - name: Create GlusterFS registry endpoints oc_obj: - namespace: "{{ glusterfs_namespace }}" + namespace: "{{ openshift.hosted.registry.namespace | default('default') }}" state: present kind: endpoints name: "glusterfs-{{ glusterfs_name }}-endpoints" @@ -65,7 +65,7 @@ - name: Create GlusterFS registry service oc_obj: - namespace: "{{ glusterfs_namespace }}" + namespace: "{{ openshift.hosted.registry.namespace | default('default') }}" state: present kind: service name: "glusterfs-{{ glusterfs_name }}-endpoints" |