summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/calico/defaults/main.yaml5
-rw-r--r--roles/calico/tasks/gen_certs.yml17
-rw-r--r--roles/calico/tasks/main.yml39
-rw-r--r--roles/calico/templates/10-calico.conf.j22
-rw-r--r--roles/calico/templates/calico.service.j24
-rw-r--r--roles/calico/templates/calicoctl.cfg.j22
-rw-r--r--roles/calico_master/templates/calico-policy-controller.yml.j26
-rw-r--r--roles/docker/README.md2
-rw-r--r--roles/docker/tasks/package_docker.yml15
-rw-r--r--roles/docker/tasks/systemcontainer_docker.yml10
-rw-r--r--roles/openshift_ca/tasks/main.yml32
-rw-r--r--roles/openshift_ca/vars/main.yml3
-rw-r--r--roles/openshift_docker_facts/tasks/main.yml3
-rw-r--r--roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-centos7.json28
-rw-r--r--roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-rhel7.json20
-rw-r--r--roles/openshift_excluder/tasks/disable.yml6
-rwxr-xr-xroles/openshift_facts/library/openshift_facts.py12
-rwxr-xr-xroles/openshift_health_checker/library/aos_version.py105
-rw-r--r--roles/openshift_health_checker/library/ocutil.py74
-rw-r--r--roles/openshift_health_checker/library/rpm_version.py127
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py22
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/__init__.py0
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/curator.py61
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py217
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/fluentd.py170
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/kibana.py229
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging.py96
-rw-r--r--roles/openshift_health_checker/openshift_checks/ovs_version.py78
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_version.py108
-rw-r--r--roles/openshift_health_checker/test/aos_version_test.py87
-rw-r--r--roles/openshift_health_checker/test/curator_test.py68
-rw-r--r--roles/openshift_health_checker/test/elasticsearch_test.py180
-rw-r--r--roles/openshift_health_checker/test/fluentd_test.py109
-rw-r--r--roles/openshift_health_checker/test/kibana_test.py218
-rw-r--r--roles/openshift_health_checker/test/logging_check_test.py137
-rw-r--r--roles/openshift_health_checker/test/ovs_version_test.py89
-rw-r--r--roles/openshift_health_checker/test/package_version_test.py126
-rw-r--r--roles/openshift_health_checker/test/rpm_version_test.py82
-rw-r--r--roles/openshift_logging/README.md31
-rw-r--r--roles/openshift_logging/defaults/main.yml9
-rw-r--r--roles/openshift_logging/tasks/delete_logging.yaml3
-rw-r--r--roles/openshift_logging/tasks/generate_certs.yaml2
-rw-r--r--roles/openshift_logging/tasks/install_logging.yaml7
-rw-r--r--roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j21
-rw-r--r--roles/openshift_logging_kibana/tasks/main.yaml4
-rw-r--r--roles/openshift_logging_mux/defaults/main.yml11
-rw-r--r--roles/openshift_logging_mux/tasks/main.yaml22
-rw-r--r--roles/openshift_logging_mux/templates/mux.j22
-rw-r--r--roles/openshift_master/tasks/main.yml1
-rw-r--r--roles/openshift_master_certificates/tasks/main.yml6
-rw-r--r--roles/openshift_node/tasks/main.yml1
-rw-r--r--roles/openshift_node_upgrade/tasks/restart.yml8
-rw-r--r--roles/openshift_node_upgrade/tasks/rpm_upgrade.yml15
l---------roles/openshift_node_upgrade/templates/atomic-openshift-node.service.j21
l---------roles/openshift_node_upgrade/templates/origin-node.service.j21
55 files changed, 2548 insertions, 166 deletions
diff --git a/roles/calico/defaults/main.yaml b/roles/calico/defaults/main.yaml
index 03c612982..c7eea46f2 100644
--- a/roles/calico/defaults/main.yaml
+++ b/roles/calico/defaults/main.yaml
@@ -1,15 +1,10 @@
---
kubeconfig: "{{openshift.common.config_base}}/node/{{ 'system:node:' + openshift.common.hostname }}.kubeconfig"
-etcd_endpoints: "{{ hostvars[groups.oo_first_master.0].openshift.master.etcd_urls | join(',') }}"
cni_conf_dir: "/etc/cni/net.d/"
cni_bin_dir: "/opt/cni/bin/"
cni_url: "https://github.com/containernetworking/cni/releases/download/v0.4.0/cni-amd64-v0.4.0.tgz"
-calico_etcd_ca_cert_file: "/etc/origin/calico/calico.etcd-ca.crt"
-calico_etcd_cert_file: "/etc/origin/calico/calico.etcd-client.crt"
-calico_etcd_key_file: "/etc/origin/calico/calico.etcd-client.key"
-
calico_url_cni: "https://github.com/projectcalico/cni-plugin/releases/download/v1.5.5/calico"
calico_url_ipam: "https://github.com/projectcalico/cni-plugin/releases/download/v1.5.5/calico-ipam"
diff --git a/roles/calico/tasks/gen_certs.yml b/roles/calico/tasks/gen_certs.yml
new file mode 100644
index 000000000..2e6aa114e
--- /dev/null
+++ b/roles/calico/tasks/gen_certs.yml
@@ -0,0 +1,17 @@
+---
+- name: Calico Node | Generate OpenShift-etcd certs
+ include: ../../../roles/etcd_client_certificates/tasks/main.yml
+ vars:
+ etcd_cert_prefix: calico.etcd-
+ etcd_cert_config_dir: "{{ openshift.common.config_base }}/calico"
+ embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
+ etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}"
+ etcd_cert_subdir: "openshift-calico-{{ openshift.common.hostname }}"
+
+- name: Calico Node | Set etcd cert location facts
+ set_fact:
+ calico_etcd_ca_cert_file: "/etc/origin/calico/calico.etcd-ca.crt"
+ calico_etcd_cert_file: "/etc/origin/calico/calico.etcd-client.crt"
+ calico_etcd_key_file: "/etc/origin/calico/calico.etcd-client.key"
+ calico_etcd_endpoints: "{{ hostvars[groups.oo_first_master.0].openshift.master.etcd_urls | join(',') }}"
+ calico_etcd_cert_dir: "/etc/origin/calico/"
diff --git a/roles/calico/tasks/main.yml b/roles/calico/tasks/main.yml
index fa5e338b3..8a7a61dc9 100644
--- a/roles/calico/tasks/main.yml
+++ b/roles/calico/tasks/main.yml
@@ -1,19 +1,36 @@
---
-- include: ../../../roles/etcd_client_certificates/tasks/main.yml
- vars:
- etcd_cert_prefix: calico.etcd-
- etcd_cert_config_dir: "{{ openshift.common.config_base }}/calico"
- embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
- etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}"
- etcd_cert_subdir: "openshift-calico-{{ openshift.common.hostname }}"
+- name: Calico Node | Error if invalid cert arguments
+ fail:
+ msg: "Must provide all or none for the following etcd params: calico_etcd_cert_dir, calico_etcd_ca_cert_file, calico_etcd_cert_file, calico_etcd_key_file, calico_etcd_endpoints"
+ when: (calico_etcd_cert_dir is defined or calico_etcd_ca_cert_file is defined or calico_etcd_cert_file is defined or calico_etcd_key_file is defined or calico_etcd_endpoints is defined) and not (calico_etcd_cert_dir is defined and calico_etcd_ca_cert_file is defined and calico_etcd_cert_file is defined and calico_etcd_key_file is defined and calico_etcd_endpoints is defined)
-- name: Calico Node | Assure the calico certs have been generated
+- name: Calico Node | Generate certs if not provided
+ include: gen_certs.yml
+ when: item is not defined
+ with_items:
+ - calico_etcd_ca_cert_file
+ - calico_etcd_cert_file
+ - calico_etcd_key_file
+ - calico_etcd_endpoints
+ - calico_etcd_cert_dir
+
+- name: Calico Node | Error if no certs set.
+ fail:
+ msg: "Invalid etcd configuration for calico."
+ when: item is not defined or item == ''
+ with_items:
+ - calico_etcd_ca_cert_file
+ - calico_etcd_cert_file
+ - calico_etcd_key_file
+ - calico_etcd_endpoints
+
+- name: Calico Node | Assure the calico certs are present
stat:
path: "{{ item }}"
with_items:
- - "{{ calico_etcd_ca_cert_file }}"
- - "{{ calico_etcd_cert_file}}"
- - "{{ calico_etcd_key_file }}"
+ - "{{ calico_etcd_ca_cert_file }}"
+ - "{{ calico_etcd_cert_file}}"
+ - "{{ calico_etcd_key_file }}"
- name: Calico Node | Configure Calico service unit file
template:
diff --git a/roles/calico/templates/10-calico.conf.j2 b/roles/calico/templates/10-calico.conf.j2
index 3c8c6b046..1ec569cff 100644
--- a/roles/calico/templates/10-calico.conf.j2
+++ b/roles/calico/templates/10-calico.conf.j2
@@ -4,7 +4,7 @@
"ipam": {
"type": "calico-ipam"
},
- "etcd_endpoints": "{{ etcd_endpoints }}",
+ "etcd_endpoints": "{{ calico_etcd_endpoints }}",
"etcd_key_file": "{{ calico_etcd_key_file }}",
"etcd_cert_file": "{{ calico_etcd_cert_file }}",
"etcd_ca_cert_file": "{{ calico_etcd_ca_cert_file }}",
diff --git a/roles/calico/templates/calico.service.j2 b/roles/calico/templates/calico.service.j2
index 719d7ba0d..302c5f34e 100644
--- a/roles/calico/templates/calico.service.j2
+++ b/roles/calico/templates/calico.service.j2
@@ -13,8 +13,8 @@ ExecStart=/usr/bin/docker run --net=host --privileged \
-e CALICO_IPV4POOL_IPIP={{ calico_ipv4pool_ipip }} \
-e CALICO_IPV4POOL_CIDR={{ calico_ipv4pool_cidr }} \
-e FELIX_IPV6SUPPORT=false \
- -e ETCD_ENDPOINTS={{ etcd_endpoints }} \
- -v /etc/origin/calico:/etc/origin/calico \
+ -e ETCD_ENDPOINTS={{ calico_etcd_endpoints }} \
+ -v {{ calico_etcd_cert_dir }}:{{ calico_etcd_cert_dir }} \
-e ETCD_CA_CERT_FILE={{ calico_etcd_ca_cert_file }} \
-e ETCD_CERT_FILE={{ calico_etcd_cert_file }} \
-e ETCD_KEY_FILE={{ calico_etcd_key_file }} \
diff --git a/roles/calico/templates/calicoctl.cfg.j2 b/roles/calico/templates/calicoctl.cfg.j2
index 722385ed8..a00ea27dc 100644
--- a/roles/calico/templates/calicoctl.cfg.j2
+++ b/roles/calico/templates/calicoctl.cfg.j2
@@ -3,7 +3,7 @@ kind: calicoApiConfig
metadata:
spec:
datastoreType: "etcdv2"
- etcdEndpoints: "{{ etcd_endpoints }}"
+ etcdEndpoints: "{{ calico_etcd_endpoints }}"
etcdKeyFile: "{{ calico_etcd_key_file }}"
etcdCertFile: "{{ calico_etcd_cert_file }}"
etcdCaCertFile: "{{ calico_etcd_ca_cert_file }}"
diff --git a/roles/calico_master/templates/calico-policy-controller.yml.j2 b/roles/calico_master/templates/calico-policy-controller.yml.j2
index 3fb1abf0d..1b87758ce 100644
--- a/roles/calico_master/templates/calico-policy-controller.yml.j2
+++ b/roles/calico_master/templates/calico-policy-controller.yml.j2
@@ -78,7 +78,7 @@ spec:
env:
# The location of the Calico etcd cluster.
- name: ETCD_ENDPOINTS
- value: {{ etcd_endpoints }}
+ value: {{ calico_etcd_endpoints }}
# Location of the CA certificate for etcd.
- name: ETCD_CA_CERT_FILE
value: {{ calico_etcd_ca_cert_file }}
@@ -96,10 +96,10 @@ spec:
volumeMounts:
# Mount in the etcd TLS secrets.
- name: certs
- mountPath: /etc/origin/calico
+ mountPath: {{ calico_etcd_cert_dir }}
volumes:
# Mount in the etcd TLS secrets.
- name: certs
hostPath:
- path: /etc/origin/calico
+ path: {{ calico_etcd_cert_dir }}
diff --git a/roles/docker/README.md b/roles/docker/README.md
index 4a9f21f22..19908c036 100644
--- a/roles/docker/README.md
+++ b/roles/docker/README.md
@@ -3,7 +3,7 @@ Docker
Ensures docker package or system container is installed, and optionally raises timeout for systemd-udevd.service to 5 minutes.
-daemon.json items may be found at https://docs.docker.com/engine/reference/commandline/dockerd/#daemon-configuration-file
+container-daemon.json items may be found at https://docs.docker.com/engine/reference/commandline/dockerd/#daemon-configuration-file
Requirements
------------
diff --git a/roles/docker/tasks/package_docker.yml b/roles/docker/tasks/package_docker.yml
index e101730d2..c82d8659a 100644
--- a/roles/docker/tasks/package_docker.yml
+++ b/roles/docker/tasks/package_docker.yml
@@ -102,6 +102,21 @@
notify:
- restart docker
+- stat: path=/etc/sysconfig/docker-network
+ register: sysconfig_docker_network_check
+
+- name: Configure Docker Network OPTIONS
+ lineinfile:
+ dest: /etc/sysconfig/docker-network
+ regexp: '^DOCKER_NETWORK_OPTIONS=.*$'
+ line: "DOCKER_NETWORK_OPTIONS='\
+ {% if openshift.node is defined and openshift.node.sdn_mtu is defined %} --mtu={{ openshift.node.sdn_mtu }}{% endif %}'"
+ when:
+ - sysconfig_docker_network_check.stat.isreg is defined
+ - sysconfig_docker_network_check.stat.isreg
+ notify:
+ - restart docker
+
- name: Start the Docker service
systemd:
name: docker
diff --git a/roles/docker/tasks/systemcontainer_docker.yml b/roles/docker/tasks/systemcontainer_docker.yml
index f0f5a40dd..650f06f86 100644
--- a/roles/docker/tasks/systemcontainer_docker.yml
+++ b/roles/docker/tasks/systemcontainer_docker.yml
@@ -130,8 +130,8 @@
dest: "{{ container_engine_systemd_dir }}/custom.conf"
src: systemcontainercustom.conf.j2
-# Set local versions of facts that must be in json format for daemon.json
-# NOTE: When jinja2.9+ is used the daemon.json file can move to using tojson
+# Set local versions of facts that must be in json format for container-daemon.json
+# NOTE: When jinja2.9+ is used the container-daemon.json file can move to using tojson
- set_fact:
l_docker_insecure_registries: "{{ docker_insecure_registries | default([]) | to_json }}"
l_docker_log_options: "{{ docker_log_options | default({}) | to_json }}"
@@ -139,10 +139,12 @@
l_docker_blocked_registries: "{{ docker_blocked_registries | default([]) | to_json }}"
l_docker_selinux_enabled: "{{ docker_selinux_enabled | default(true) | to_json }}"
-# Configure container-engine using the daemon.json file
+# Configure container-engine using the container-daemon.json file
+# NOTE: daemon.json and container-daemon.json have been seperated to avoid
+# collision.
- name: Configure Container Engine
template:
- dest: "{{ docker_conf_dir }}/daemon.json"
+ dest: "{{ docker_conf_dir }}/container-daemon.json"
src: daemon.json
# Enable and start the container-engine service
diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml
index c7b906949..b9a7ec32f 100644
--- a/roles/openshift_ca/tasks/main.yml
+++ b/roles/openshift_ca/tasks/main.yml
@@ -108,6 +108,38 @@
delegate_to: "{{ openshift_ca_host }}"
run_once: true
+- name: Test local loopback context
+ command: >
+ {{ hostvars[openshift_ca_host].openshift.common.client_binary }} config view
+ --config={{ openshift_master_loopback_config }}
+ changed_when: false
+ register: loopback_config
+ delegate_to: "{{ openshift_ca_host }}"
+ run_once: true
+
+- name: Generate the loopback master client config
+ command: >
+ {{ hostvars[openshift_ca_host].openshift.common.client_binary }} adm create-api-client-config
+ {% for named_ca_certificate in openshift.master.named_certificates | default([]) | oo_collect('cafile') %}
+ --certificate-authority {{ named_ca_certificate }}
+ {% endfor %}
+ --certificate-authority={{ openshift_ca_cert }}
+ --client-dir={{ openshift_ca_config_dir }}
+ --groups=system:masters,system:openshift-master
+ --master={{ hostvars[openshift_ca_host].openshift.master.loopback_api_url }}
+ --public-master={{ hostvars[openshift_ca_host].openshift.master.loopback_api_url }}
+ --signer-cert={{ openshift_ca_cert }}
+ --signer-key={{ openshift_ca_key }}
+ --signer-serial={{ openshift_ca_serial }}
+ --user=system:openshift-master
+ --basename=openshift-master
+ {% if openshift_version | oo_version_gte_3_5_or_1_5(openshift.common.deployment_type) | bool %}
+ --expire-days={{ openshift_master_cert_expire_days }}
+ {% endif %}
+ when: loopback_context_string not in loopback_config.stdout
+ delegate_to: "{{ openshift_ca_host }}"
+ run_once: true
+
- name: Restore original serviceaccount keys
copy:
src: "{{ item }}.keep"
diff --git a/roles/openshift_ca/vars/main.yml b/roles/openshift_ca/vars/main.yml
index a32e385ec..d04c1766d 100644
--- a/roles/openshift_ca/vars/main.yml
+++ b/roles/openshift_ca/vars/main.yml
@@ -4,3 +4,6 @@ openshift_ca_cert: "{{ openshift_ca_config_dir }}/ca.crt"
openshift_ca_key: "{{ openshift_ca_config_dir }}/ca.key"
openshift_ca_serial: "{{ openshift_ca_config_dir }}/ca.serial.txt"
openshift_version: "{{ openshift_pkg_version | default('') }}"
+
+openshift_master_loopback_config: "{{ openshift_ca_config_dir }}/openshift-master.kubeconfig"
+loopback_context_string: "current-context: {{ openshift.master.loopback_context_name }}"
diff --git a/roles/openshift_docker_facts/tasks/main.yml b/roles/openshift_docker_facts/tasks/main.yml
index 350512452..95e94171d 100644
--- a/roles/openshift_docker_facts/tasks/main.yml
+++ b/roles/openshift_docker_facts/tasks/main.yml
@@ -17,6 +17,9 @@
hosted_registry_insecure: "{{ openshift_docker_hosted_registry_insecure | default(openshift.docker.hosted_registry_insecure | default(False)) }}"
hosted_registry_network: "{{ openshift_docker_hosted_registry_network | default(None) }}"
use_system_container: "{{ openshift_docker_use_system_container | default(False) }}"
+ - role: node
+ local_facts:
+ sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}"
- set_fact:
docker_additional_registries: "{{ openshift.docker.additional_registries
diff --git a/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-centos7.json b/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-centos7.json
index a81dbb654..2583018b7 100644
--- a/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-centos7.json
+++ b/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-centos7.json
@@ -103,7 +103,7 @@
},
"from": {
"kind": "ImageStreamTag",
- "name": "4"
+ "name": "6"
}
},
{
@@ -137,6 +137,22 @@
"kind": "DockerImage",
"name": "centos/nodejs-4-centos7:latest"
}
+ },
+ {
+ "name": "6",
+ "annotations": {
+ "openshift.io/display-name": "Node.js 6",
+ "description": "Build and run Node.js 6 applications on CentOS 7. For more information about using this builder image, including OpenShift considerations, see https://github.com/sclorg/s2i-nodejs-container/blob/master/6/README.md.",
+ "iconClass": "icon-nodejs",
+ "tags": "builder,nodejs",
+ "supports":"nodejs:6,nodejs",
+ "version": "6",
+ "sampleRepo": "https://github.com/openshift/nodejs-ex.git"
+ },
+ "from": {
+ "kind": "DockerImage",
+ "name": "centos/nodejs-6-centos7:latest"
+ }
}
]
}
@@ -407,7 +423,7 @@
"iconClass": "icon-wildfly",
"tags": "builder,wildfly,java",
"supports":"jee,java",
- "sampleRepo": "https://github.com/bparees/openshift-jee-sample.git"
+ "sampleRepo": "https://github.com/openshift/openshift-jee-sample.git"
},
"from": {
"kind": "ImageStreamTag",
@@ -423,7 +439,7 @@
"tags": "builder,wildfly,java",
"supports":"wildfly:8.1,jee,java",
"version": "8.1",
- "sampleRepo": "https://github.com/bparees/openshift-jee-sample.git"
+ "sampleRepo": "https://github.com/openshift/openshift-jee-sample.git"
},
"from": {
"kind": "DockerImage",
@@ -439,7 +455,7 @@
"tags": "builder,wildfly,java",
"supports":"wildfly:9.0,jee,java",
"version": "9.0",
- "sampleRepo": "https://github.com/bparees/openshift-jee-sample.git"
+ "sampleRepo": "https://github.com/openshift/openshift-jee-sample.git"
},
"from": {
"kind": "DockerImage",
@@ -455,7 +471,7 @@
"tags": "builder,wildfly,java",
"supports":"wildfly:10.0,jee,java",
"version": "10.0",
- "sampleRepo": "https://github.com/bparees/openshift-jee-sample.git"
+ "sampleRepo": "https://github.com/openshift/openshift-jee-sample.git"
},
"from": {
"kind": "DockerImage",
@@ -471,7 +487,7 @@
"tags": "builder,wildfly,java",
"supports":"wildfly:10.1,jee,java",
"version": "10.1",
- "sampleRepo": "https://github.com/bparees/openshift-jee-sample.git"
+ "sampleRepo": "https://github.com/openshift/openshift-jee-sample.git"
},
"from": {
"kind": "DockerImage",
diff --git a/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-rhel7.json b/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-rhel7.json
index 2ed0efe1e..b65f0a5e3 100644
--- a/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-rhel7.json
+++ b/roles/openshift_examples/files/examples/v3.6/image-streams/image-streams-rhel7.json
@@ -103,7 +103,7 @@
},
"from": {
"kind": "ImageStreamTag",
- "name": "4"
+ "name": "6"
}
},
{
@@ -137,6 +137,22 @@
"kind": "DockerImage",
"name": "registry.access.redhat.com/rhscl/nodejs-4-rhel7:latest"
}
+ },
+ {
+ "name": "6",
+ "annotations": {
+ "openshift.io/display-name": "Node.js 6",
+ "description": "Build and run Node.js 6 applications on RHEL 7. For more information about using this builder image, including OpenShift considerations, see https://github.com/sclorg/s2i-nodejs-container.",
+ "iconClass": "icon-nodejs",
+ "tags": "builder,nodejs",
+ "supports":"nodejs:6,nodejs",
+ "version": "6",
+ "sampleRepo": "https://github.com/openshift/nodejs-ex.git"
+ },
+ "from": {
+ "kind": "DockerImage",
+ "name": "registry.access.redhat.com/rhscl/nodejs-6-rhel7:latest"
+ }
}
]
}
@@ -253,7 +269,7 @@
"tags": "hidden,builder,php",
"supports":"php:5.5,php",
"version": "5.5",
- "sampleRepo": "https://github.com/openshift/cakephp-ex.git"
+ "sampleRepo": "https://github.com/openshift/cakephp-ex.git"
},
"from": {
"kind": "DockerImage",
diff --git a/roles/openshift_excluder/tasks/disable.yml b/roles/openshift_excluder/tasks/disable.yml
index 8d5a08874..5add25b45 100644
--- a/roles/openshift_excluder/tasks/disable.yml
+++ b/roles/openshift_excluder/tasks/disable.yml
@@ -5,10 +5,12 @@
include: verify_upgrade.yml
# unexclude the current openshift/origin-excluder if it is installed so it can be updated
-- name: Disable OpenShift excluder so it can be updated
+- name: Disable excluders before the upgrade to remove older excluding expressions
include: unexclude.yml
vars:
- unexclude_docker_excluder: false
+ # before the docker excluder can be updated, it needs to be disabled
+ # to remove older excluded packages that are no longer excluded
+ unexclude_docker_excluder: "{{ r_openshift_excluder_enable_docker_excluder }}"
unexclude_openshift_excluder: "{{ r_openshift_excluder_enable_openshift_excluder }}"
# Install any excluder that is enabled
diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py
index 514c06500..cfe092a28 100755
--- a/roles/openshift_facts/library/openshift_facts.py
+++ b/roles/openshift_facts/library/openshift_facts.py
@@ -193,8 +193,7 @@ def hostname_valid(hostname):
"""
if (not hostname or
hostname.startswith('localhost') or
- hostname.endswith('localdomain') or
- hostname.endswith('novalocal')):
+ hostname.endswith('localdomain')):
return False
return True
@@ -1041,10 +1040,13 @@ def set_sdn_facts_if_unset(facts, system_facts):
def set_nodename(facts):
""" set nodename """
if 'node' in facts and 'common' in facts:
- if 'cloudprovider' in facts and facts['cloudprovider']['kind'] == 'openstack':
- facts['node']['nodename'] = facts['provider']['metadata']['hostname'].replace('.novalocal', '')
- elif 'cloudprovider' in facts and facts['cloudprovider']['kind'] == 'gce':
+ if 'cloudprovider' in facts and facts['cloudprovider']['kind'] == 'gce':
facts['node']['nodename'] = facts['provider']['metadata']['instance']['hostname'].split('.')[0]
+
+ # TODO: The openstack cloudprovider nodename setting was too opinionaed.
+ # It needs to be generalized before it can be enabled again.
+ # elif 'cloudprovider' in facts and facts['cloudprovider']['kind'] == 'openstack':
+ # facts['node']['nodename'] = facts['provider']['metadata']['hostname'].replace('.novalocal', '')
else:
facts['node']['nodename'] = facts['common']['hostname'].lower()
return facts
diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py
index 4460ec324..4c205e48c 100755
--- a/roles/openshift_health_checker/library/aos_version.py
+++ b/roles/openshift_health_checker/library/aos_version.py
@@ -16,8 +16,6 @@ of release availability already. Without duplicating all that, we would
like the user to have a helpful error message if we detect things will
not work out right. Note that if openshift_release is not specified in
the inventory, the version comparison checks just pass.
-
-TODO: fail gracefully on non-yum systems (dnf in Fedora)
'''
from ansible.module_utils.basic import AnsibleModule
@@ -26,7 +24,7 @@ IMPORT_EXCEPTION = None
try:
import yum # pylint: disable=import-error
except ImportError as err:
- IMPORT_EXCEPTION = err # in tox test env, yum import fails
+ IMPORT_EXCEPTION = err
class AosVersionException(Exception):
@@ -37,12 +35,10 @@ class AosVersionException(Exception):
def main():
- '''Entrypoint for this Ansible module'''
+ """Entrypoint for this Ansible module"""
module = AnsibleModule(
argument_spec=dict(
- requested_openshift_release=dict(type="str", default=''),
- openshift_deployment_type=dict(required=True),
- rpm_prefix=dict(required=True), # atomic-openshift, origin, ...?
+ package_list=dict(type="list", required=True),
),
supports_check_mode=True
)
@@ -51,32 +47,37 @@ def main():
module.fail_json(msg="aos_version module could not import yum: %s" % IMPORT_EXCEPTION)
# determine the packages we will look for
- rpm_prefix = module.params['rpm_prefix']
- if not rpm_prefix:
- module.fail_json(msg="rpm_prefix must not be empty")
- expected_pkgs = set([
- rpm_prefix,
- rpm_prefix + '-master',
- rpm_prefix + '-node',
- ])
-
- # determine what level of precision the user specified for the openshift version.
- # should look like a version string with possibly many segments e.g. "3.4.1":
- requested_openshift_release = module.params['requested_openshift_release']
+ package_list = module.params['package_list']
+ if not package_list:
+ module.fail_json(msg="package_list must not be empty")
+
+ # generate set with only the names of expected packages
+ expected_pkg_names = [p["name"] for p in package_list]
+
+ # gather packages that require a multi_minor_release check
+ multi_minor_pkgs = [p for p in package_list if p["check_multi"]]
+
+ # generate list of packages with a specified (non-empty) version
+ # should look like a version string with possibly many segments e.g. "3.4.1"
+ versioned_pkgs = [p for p in package_list if p["version"]]
# get the list of packages available and complain if anything is wrong
try:
- pkgs = _retrieve_available_packages(expected_pkgs)
- if requested_openshift_release:
- _check_precise_version_found(pkgs, expected_pkgs, requested_openshift_release)
- _check_higher_version_found(pkgs, expected_pkgs, requested_openshift_release)
- if module.params['openshift_deployment_type'] in ['openshift-enterprise']:
- _check_multi_minor_release(pkgs, expected_pkgs)
+ pkgs = _retrieve_available_packages(expected_pkg_names)
+ if versioned_pkgs:
+ _check_precise_version_found(pkgs, _to_dict(versioned_pkgs))
+ _check_higher_version_found(pkgs, _to_dict(versioned_pkgs))
+ if multi_minor_pkgs:
+ _check_multi_minor_release(pkgs, _to_dict(multi_minor_pkgs))
except AosVersionException as excinfo:
module.fail_json(msg=str(excinfo))
module.exit_json(changed=False)
+def _to_dict(pkg_list):
+ return {pkg["name"]: pkg for pkg in pkg_list}
+
+
def _retrieve_available_packages(expected_pkgs):
# search for package versions available for openshift pkgs
yb = yum.YumBase() # pylint: disable=invalid-name
@@ -104,56 +105,60 @@ def _retrieve_available_packages(expected_pkgs):
class PreciseVersionNotFound(AosVersionException):
- '''Exception for reporting packages not available at given release'''
- def __init__(self, requested_release, not_found):
- msg = ['Not all of the required packages are available at requested version %s:' % requested_release]
- msg += [' ' + name for name in not_found]
+ """Exception for reporting packages not available at given version"""
+ def __init__(self, not_found):
+ msg = ['Not all of the required packages are available at their requested version']
+ msg += ['{}:{} '.format(pkg["name"], pkg["version"]) for pkg in not_found]
msg += ['Please check your subscriptions and enabled repositories.']
AosVersionException.__init__(self, '\n'.join(msg), not_found)
-def _check_precise_version_found(pkgs, expected_pkgs, requested_openshift_release):
+def _check_precise_version_found(pkgs, expected_pkgs_dict):
# see if any packages couldn't be found at requested release version
# we would like to verify that the latest available pkgs have however specific a version is given.
# so e.g. if there is a package version 3.4.1.5 the check passes; if only 3.4.0, it fails.
- pkgs_precise_version_found = {}
+ pkgs_precise_version_found = set()
for pkg in pkgs:
- if pkg.name not in expected_pkgs:
+ if pkg.name not in expected_pkgs_dict:
continue
# does the version match, to the precision requested?
# and, is it strictly greater, at the precision requested?
- match_version = '.'.join(pkg.version.split('.')[:requested_openshift_release.count('.') + 1])
- if match_version == requested_openshift_release:
- pkgs_precise_version_found[pkg.name] = True
+ expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
+ match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
+ if match_version == expected_pkg_version:
+ pkgs_precise_version_found.add(pkg.name)
not_found = []
- for name in expected_pkgs:
+ for name, pkg in expected_pkgs_dict.items():
if name not in pkgs_precise_version_found:
- not_found.append(name)
+ not_found.append(pkg)
if not_found:
- raise PreciseVersionNotFound(requested_openshift_release, not_found)
+ raise PreciseVersionNotFound(not_found)
class FoundHigherVersion(AosVersionException):
- '''Exception for reporting that a higher version than requested is available'''
- def __init__(self, requested_release, higher_found):
+ """Exception for reporting that a higher version than requested is available"""
+ def __init__(self, higher_found):
msg = ['Some required package(s) are available at a version',
- 'that is higher than requested %s:' % requested_release]
+ 'that is higher than requested']
msg += [' ' + name for name in higher_found]
msg += ['This will prevent installing the version you requested.']
msg += ['Please check your enabled repositories or adjust openshift_release.']
AosVersionException.__init__(self, '\n'.join(msg), higher_found)
-def _check_higher_version_found(pkgs, expected_pkgs, requested_openshift_release):
- req_release_arr = [int(segment) for segment in requested_openshift_release.split(".")]
+def _check_higher_version_found(pkgs, expected_pkgs_dict):
+ expected_pkg_names = list(expected_pkgs_dict)
+
# see if any packages are available in a version higher than requested
higher_version_for_pkg = {}
for pkg in pkgs:
- if pkg.name not in expected_pkgs:
+ if pkg.name not in expected_pkg_names:
continue
+ expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
+ req_release_arr = [int(segment) for segment in expected_pkg_version.split(".")]
version = [int(segment) for segment in pkg.version.split(".")]
too_high = version[:len(req_release_arr)] > req_release_arr
higher_than_seen = version > higher_version_for_pkg.get(pkg.name, [])
@@ -164,11 +169,11 @@ def _check_higher_version_found(pkgs, expected_pkgs, requested_openshift_release
higher_found = []
for name, version in higher_version_for_pkg.items():
higher_found.append(name + '-' + '.'.join(str(segment) for segment in version))
- raise FoundHigherVersion(requested_openshift_release, higher_found)
+ raise FoundHigherVersion(higher_found)
class FoundMultiRelease(AosVersionException):
- '''Exception for reporting multiple minor releases found for same package'''
+ """Exception for reporting multiple minor releases found for same package"""
def __init__(self, multi_found):
msg = ['Multiple minor versions of these packages are available']
msg += [' ' + name for name in multi_found]
@@ -176,18 +181,18 @@ class FoundMultiRelease(AosVersionException):
AosVersionException.__init__(self, '\n'.join(msg), multi_found)
-def _check_multi_minor_release(pkgs, expected_pkgs):
+def _check_multi_minor_release(pkgs, expected_pkgs_dict):
# see if any packages are available in more than one minor version
pkgs_by_name_version = {}
for pkg in pkgs:
# keep track of x.y (minor release) versions seen
minor_release = '.'.join(pkg.version.split('.')[:2])
if pkg.name not in pkgs_by_name_version:
- pkgs_by_name_version[pkg.name] = {}
- pkgs_by_name_version[pkg.name][minor_release] = True
+ pkgs_by_name_version[pkg.name] = set()
+ pkgs_by_name_version[pkg.name].add(minor_release)
multi_found = []
- for name in expected_pkgs:
+ for name in expected_pkgs_dict:
if name in pkgs_by_name_version and len(pkgs_by_name_version[name]) > 1:
multi_found.append(name)
diff --git a/roles/openshift_health_checker/library/ocutil.py b/roles/openshift_health_checker/library/ocutil.py
new file mode 100644
index 000000000..2e60735d6
--- /dev/null
+++ b/roles/openshift_health_checker/library/ocutil.py
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+"""Interface to OpenShift oc command"""
+
+import os
+import shlex
+import shutil
+import subprocess
+
+from ansible.module_utils.basic import AnsibleModule
+
+
+ADDITIONAL_PATH_LOOKUPS = ['/usr/local/bin', os.path.expanduser('~/bin')]
+
+
+def locate_oc_binary():
+ """Find and return oc binary file"""
+ # https://github.com/openshift/openshift-ansible/issues/3410
+ # oc can be in /usr/local/bin in some cases, but that may not
+ # be in $PATH due to ansible/sudo
+ paths = os.environ.get("PATH", os.defpath).split(os.pathsep) + ADDITIONAL_PATH_LOOKUPS
+
+ oc_binary = 'oc'
+
+ # Use shutil.which if it is available, otherwise fallback to a naive path search
+ try:
+ which_result = shutil.which(oc_binary, path=os.pathsep.join(paths))
+ if which_result is not None:
+ oc_binary = which_result
+ except AttributeError:
+ for path in paths:
+ if os.path.exists(os.path.join(path, oc_binary)):
+ oc_binary = os.path.join(path, oc_binary)
+ break
+
+ return oc_binary
+
+
+def main():
+ """Module that executes commands on a remote OpenShift cluster"""
+
+ module = AnsibleModule(
+ argument_spec=dict(
+ namespace=dict(type="str", required=True),
+ config_file=dict(type="str", required=True),
+ cmd=dict(type="str", required=True),
+ extra_args=dict(type="list", default=[]),
+ ),
+ )
+
+ cmd = [
+ locate_oc_binary(),
+ '--config', module.params["config_file"],
+ '-n', module.params["namespace"],
+ ] + shlex.split(module.params["cmd"])
+
+ failed = True
+ try:
+ cmd_result = subprocess.check_output(list(cmd), stderr=subprocess.STDOUT)
+ failed = False
+ except subprocess.CalledProcessError as exc:
+ cmd_result = '[rc {}] {}\n{}'.format(exc.returncode, ' '.join(exc.cmd), exc.output)
+ except OSError as exc:
+ # we get this when 'oc' is not there
+ cmd_result = str(exc)
+
+ module.exit_json(
+ changed=False,
+ failed=failed,
+ result=cmd_result,
+ )
+
+
+if __name__ == '__main__':
+ main()
diff --git a/roles/openshift_health_checker/library/rpm_version.py b/roles/openshift_health_checker/library/rpm_version.py
new file mode 100644
index 000000000..8ea223055
--- /dev/null
+++ b/roles/openshift_health_checker/library/rpm_version.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python
+"""
+Ansible module for rpm-based systems determining existing package version information in a host.
+"""
+
+from ansible.module_utils.basic import AnsibleModule
+
+IMPORT_EXCEPTION = None
+try:
+ import rpm # pylint: disable=import-error
+except ImportError as err:
+ IMPORT_EXCEPTION = err # in tox test env, rpm import fails
+
+
+class RpmVersionException(Exception):
+ """Base exception class for package version problems"""
+ def __init__(self, message, problem_pkgs=None):
+ Exception.__init__(self, message)
+ self.problem_pkgs = problem_pkgs
+
+
+def main():
+ """Entrypoint for this Ansible module"""
+ module = AnsibleModule(
+ argument_spec=dict(
+ package_list=dict(type="list", required=True),
+ ),
+ supports_check_mode=True
+ )
+
+ if IMPORT_EXCEPTION:
+ module.fail_json(msg="rpm_version module could not import rpm: %s" % IMPORT_EXCEPTION)
+
+ # determine the packages we will look for
+ pkg_list = module.params['package_list']
+ if not pkg_list:
+ module.fail_json(msg="package_list must not be empty")
+
+ # get list of packages available and complain if any
+ # of them are missing or if any errors occur
+ try:
+ pkg_versions = _retrieve_expected_pkg_versions(_to_dict(pkg_list))
+ _check_pkg_versions(pkg_versions, _to_dict(pkg_list))
+ except RpmVersionException as excinfo:
+ module.fail_json(msg=str(excinfo))
+ module.exit_json(changed=False)
+
+
+def _to_dict(pkg_list):
+ return {pkg["name"]: pkg for pkg in pkg_list}
+
+
+def _retrieve_expected_pkg_versions(expected_pkgs_dict):
+ """Search for installed packages matching given pkg names
+ and versions. Returns a dictionary: {pkg_name: [versions]}"""
+
+ transaction = rpm.TransactionSet()
+ pkgs = {}
+
+ for pkg_name in expected_pkgs_dict:
+ matched_pkgs = transaction.dbMatch("name", pkg_name)
+ if not matched_pkgs:
+ continue
+
+ for header in matched_pkgs:
+ if header['name'] == pkg_name:
+ if pkg_name not in pkgs:
+ pkgs[pkg_name] = []
+
+ pkgs[pkg_name].append(header['version'])
+
+ return pkgs
+
+
+def _check_pkg_versions(found_pkgs_dict, expected_pkgs_dict):
+ invalid_pkg_versions = {}
+ not_found_pkgs = []
+
+ for pkg_name, pkg in expected_pkgs_dict.items():
+ if not found_pkgs_dict.get(pkg_name):
+ not_found_pkgs.append(pkg_name)
+ continue
+
+ found_versions = [_parse_version(version) for version in found_pkgs_dict[pkg_name]]
+ expected_version = _parse_version(pkg["version"])
+ if expected_version not in found_versions:
+ invalid_pkg_versions[pkg_name] = {
+ "found_versions": found_versions,
+ "required_version": expected_version,
+ }
+
+ if not_found_pkgs:
+ raise RpmVersionException(
+ '\n'.join([
+ "The following packages were not found to be installed: {}".format('\n '.join([
+ "{}".format(pkg)
+ for pkg in not_found_pkgs
+ ]))
+ ]),
+ not_found_pkgs,
+ )
+
+ if invalid_pkg_versions:
+ raise RpmVersionException(
+ '\n '.join([
+ "The following packages were found to be installed with an incorrect version: {}".format('\n'.join([
+ " \n{}\n Required version: {}\n Found versions: {}".format(
+ pkg_name,
+ pkg["required_version"],
+ ', '.join([version for version in pkg["found_versions"]]))
+ for pkg_name, pkg in invalid_pkg_versions.items()
+ ]))
+ ]),
+ invalid_pkg_versions,
+ )
+
+
+def _parse_version(version_str):
+ segs = version_str.split('.')
+ if not segs or len(segs) <= 2:
+ return version_str
+
+ return '.'.join(segs[0:2])
+
+
+if __name__ == '__main__':
+ main()
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index be63d864a..5c9949ced 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -66,16 +66,26 @@ class OpenShiftCheck(object):
LOADER_EXCLUDES = (
"__init__.py",
"mixins.py",
+ "logging.py",
)
-def load_checks():
+def load_checks(path=None, subpkg=""):
"""Dynamically import all check modules for the side effect of registering checks."""
- return [
- import_module(__package__ + "." + name[:-3])
- for name in os.listdir(os.path.dirname(__file__))
- if name.endswith(".py") and name not in LOADER_EXCLUDES
- ]
+ if path is None:
+ path = os.path.dirname(__file__)
+
+ modules = []
+
+ for name in os.listdir(path):
+ if os.path.isdir(os.path.join(path, name)):
+ modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name)
+ continue
+
+ if name.endswith(".py") and name not in LOADER_EXCLUDES:
+ modules.append(import_module(__package__ + subpkg + "." + name[:-3]))
+
+ return modules
def get_var(task_vars, *keys, **kwargs):
diff --git a/roles/openshift_health_checker/openshift_checks/logging/__init__.py b/roles/openshift_health_checker/openshift_checks/logging/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/__init__.py
diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py
new file mode 100644
index 000000000..c9fc59896
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py
@@ -0,0 +1,61 @@
+"""
+Module for performing checks on an Curator logging deployment
+"""
+
+from openshift_checks import get_var
+from openshift_checks.logging.logging import LoggingCheck
+
+
+class Curator(LoggingCheck):
+ """Module that checks an integrated logging Curator deployment"""
+
+ name = "curator"
+ tags = ["health", "logging"]
+
+ logging_namespace = None
+
+ def run(self, tmp, task_vars):
+ """Check various things and gather errors. Returns: result as hash"""
+
+ self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ curator_pods, error = super(Curator, self).get_pods_for_component(
+ self.module_executor,
+ self.logging_namespace,
+ "curator",
+ task_vars
+ )
+ if error:
+ return {"failed": True, "changed": False, "msg": error}
+ check_error = self.check_curator(curator_pods)
+
+ if check_error:
+ msg = ("The following Curator deployment issue was found:"
+ "\n-------\n"
+ "{}".format(check_error))
+ return {"failed": True, "changed": False, "msg": msg}
+
+ # TODO(lmeyer): run it all again for the ops cluster
+ return {"failed": False, "changed": False, "msg": 'No problems found with Curator deployment.'}
+
+ def check_curator(self, pods):
+ """Check to see if curator is up and working. Returns: error string"""
+ if not pods:
+ return (
+ "There are no Curator pods for the logging stack,\n"
+ "so nothing will prune Elasticsearch indexes.\n"
+ "Is Curator correctly deployed?"
+ )
+
+ not_running = super(Curator, self).not_running_pods(pods)
+ if len(not_running) == len(pods):
+ return (
+ "The Curator pod is not currently in a running state,\n"
+ "so Elasticsearch indexes may increase without bound."
+ )
+ if len(pods) - len(not_running) > 1:
+ return (
+ "There is more than one Curator pod running. This should not normally happen.\n"
+ "Although this doesn't cause any problems, you may want to investigate."
+ )
+
+ return None
diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
new file mode 100644
index 000000000..01cb35b81
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
@@ -0,0 +1,217 @@
+"""
+Module for performing checks on an Elasticsearch logging deployment
+"""
+
+import json
+import re
+
+from openshift_checks import get_var
+from openshift_checks.logging.logging import LoggingCheck
+
+
+class Elasticsearch(LoggingCheck):
+ """Module that checks an integrated logging Elasticsearch deployment"""
+
+ name = "elasticsearch"
+ tags = ["health", "logging"]
+
+ logging_namespace = None
+
+ def run(self, tmp, task_vars):
+ """Check various things and gather errors. Returns: result as hash"""
+
+ self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ es_pods, error = super(Elasticsearch, self).get_pods_for_component(
+ self.execute_module,
+ self.logging_namespace,
+ "es",
+ task_vars,
+ )
+ if error:
+ return {"failed": True, "changed": False, "msg": error}
+ check_error = self.check_elasticsearch(es_pods, task_vars)
+
+ if check_error:
+ msg = ("The following Elasticsearch deployment issue was found:"
+ "\n-------\n"
+ "{}".format(check_error))
+ return {"failed": True, "changed": False, "msg": msg}
+
+ # TODO(lmeyer): run it all again for the ops cluster
+ return {"failed": False, "changed": False, "msg": 'No problems found with Elasticsearch deployment.'}
+
+ def _not_running_elasticsearch_pods(self, es_pods):
+ """Returns: list of running pods, list of errors about non-running pods"""
+ not_running = super(Elasticsearch, self).not_running_pods(es_pods)
+ if not_running:
+ return not_running, [(
+ 'The following Elasticsearch pods are not running:\n'
+ '{pods}'
+ 'These pods will not aggregate logs from their nodes.'
+ ).format(pods=''.join(
+ " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None'))
+ for pod in not_running
+ ))]
+ return not_running, []
+
+ def check_elasticsearch(self, es_pods, task_vars):
+ """Various checks for elasticsearch. Returns: error string"""
+ not_running_pods, error_msgs = self._not_running_elasticsearch_pods(es_pods)
+ running_pods = [pod for pod in es_pods if pod not in not_running_pods]
+ pods_by_name = {
+ pod['metadata']['name']: pod for pod in running_pods
+ # Filter out pods that are not members of a DC
+ if pod['metadata'].get('labels', {}).get('deploymentconfig')
+ }
+ if not pods_by_name:
+ return 'No logging Elasticsearch pods were found. Is logging deployed?'
+ error_msgs += self._check_elasticsearch_masters(pods_by_name, task_vars)
+ error_msgs += self._check_elasticsearch_node_list(pods_by_name, task_vars)
+ error_msgs += self._check_es_cluster_health(pods_by_name, task_vars)
+ error_msgs += self._check_elasticsearch_diskspace(pods_by_name, task_vars)
+ return '\n'.join(error_msgs)
+
+ @staticmethod
+ def _build_es_curl_cmd(pod_name, url):
+ base = "exec {name} -- curl -s --cert {base}cert --key {base}key --cacert {base}ca -XGET '{url}'"
+ return base.format(base="/etc/elasticsearch/secret/admin-", name=pod_name, url=url)
+
+ def _check_elasticsearch_masters(self, pods_by_name, task_vars):
+ """Check that Elasticsearch masters are sane. Returns: list of error strings"""
+ es_master_names = set()
+ error_msgs = []
+ for pod_name in pods_by_name.keys():
+ # Compare what each ES node reports as master and compare for split brain
+ get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master")
+ master_name_str = self._exec_oc(get_master_cmd, [], task_vars)
+ master_names = (master_name_str or '').split(' ')
+ if len(master_names) > 1:
+ es_master_names.add(master_names[1])
+ else:
+ error_msgs.append(
+ 'No master? Elasticsearch {pod} returned bad string when asked master name:\n'
+ ' {response}'.format(pod=pod_name, response=master_name_str)
+ )
+
+ if not es_master_names:
+ error_msgs.append('No logging Elasticsearch masters were found. Is logging deployed?')
+ return '\n'.join(error_msgs)
+
+ if len(es_master_names) > 1:
+ error_msgs.append(
+ 'Found multiple Elasticsearch masters according to the pods:\n'
+ '{master_list}\n'
+ 'This implies that the masters have "split brain" and are not correctly\n'
+ 'replicating data for the logging cluster. Log loss is likely to occur.'
+ .format(master_list='\n'.join(' ' + master for master in es_master_names))
+ )
+
+ return error_msgs
+
+ def _check_elasticsearch_node_list(self, pods_by_name, task_vars):
+ """Check that reported ES masters are accounted for by pods. Returns: list of error strings"""
+
+ if not pods_by_name:
+ return ['No logging Elasticsearch masters were found. Is logging deployed?']
+
+ # get ES cluster nodes
+ node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes')
+ cluster_node_data = self._exec_oc(node_cmd, [], task_vars)
+ try:
+ cluster_nodes = json.loads(cluster_node_data)['nodes']
+ except (ValueError, KeyError):
+ return [
+ 'Failed to query Elasticsearch for the list of ES nodes. The output was:\n' +
+ cluster_node_data
+ ]
+
+ # Try to match all ES-reported node hosts to known pods.
+ error_msgs = []
+ for node in cluster_nodes.values():
+ # Note that with 1.4/3.4 the pod IP may be used as the master name
+ if not any(node['host'] in (pod_name, pod['status'].get('podIP'))
+ for pod_name, pod in pods_by_name.items()):
+ error_msgs.append(
+ 'The Elasticsearch cluster reports a member node "{node}"\n'
+ 'that does not correspond to any known ES pod.'.format(node=node['host'])
+ )
+
+ return error_msgs
+
+ def _check_es_cluster_health(self, pods_by_name, task_vars):
+ """Exec into the elasticsearch pods and check the cluster health. Returns: list of errors"""
+ error_msgs = []
+ for pod_name in pods_by_name.keys():
+ cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true')
+ cluster_health_data = self._exec_oc(cluster_health_cmd, [], task_vars)
+ try:
+ health_res = json.loads(cluster_health_data)
+ if not health_res or not health_res.get('status'):
+ raise ValueError()
+ except ValueError:
+ error_msgs.append(
+ 'Could not retrieve cluster health status from logging ES pod "{pod}".\n'
+ 'Response was:\n{output}'.format(pod=pod_name, output=cluster_health_data)
+ )
+ continue
+
+ if health_res['status'] not in ['green', 'yellow']:
+ error_msgs.append(
+ 'Elasticsearch cluster health status is RED according to pod "{}"'.format(pod_name)
+ )
+
+ return error_msgs
+
+ def _check_elasticsearch_diskspace(self, pods_by_name, task_vars):
+ """
+ Exec into an ES pod and query the diskspace on the persistent volume.
+ Returns: list of errors
+ """
+ error_msgs = []
+ for pod_name in pods_by_name.keys():
+ df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
+ disk_output = self._exec_oc(df_cmd, [], task_vars)
+ lines = disk_output.splitlines()
+ # expecting one header looking like 'IUse% Use%' and one body line
+ body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$'
+ if len(lines) != 2 or len(lines[0].split()) != 2 or not re.match(body_re, lines[1]):
+ error_msgs.append(
+ 'Could not retrieve storage usage from logging ES pod "{pod}".\n'
+ 'Response to `df` command was:\n{output}'.format(pod=pod_name, output=disk_output)
+ )
+ continue
+ inode_pct, disk_pct = re.match(body_re, lines[1]).groups()
+
+ inode_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_inode_pct', default='90')
+ if int(inode_pct) >= int(inode_pct_thresh):
+ error_msgs.append(
+ 'Inode percent usage on the storage volume for logging ES pod "{pod}"\n'
+ ' is {pct}, greater than threshold {limit}.\n'
+ ' Note: threshold can be specified in inventory with {param}'.format(
+ pod=pod_name,
+ pct=str(inode_pct),
+ limit=str(inode_pct_thresh),
+ param='openshift_check_efk_es_inode_pct',
+ ))
+ disk_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_storage_pct', default='80')
+ if int(disk_pct) >= int(disk_pct_thresh):
+ error_msgs.append(
+ 'Disk percent usage on the storage volume for logging ES pod "{pod}"\n'
+ ' is {pct}, greater than threshold {limit}.\n'
+ ' Note: threshold can be specified in inventory with {param}'.format(
+ pod=pod_name,
+ pct=str(disk_pct),
+ limit=str(disk_pct_thresh),
+ param='openshift_check_efk_es_storage_pct',
+ ))
+
+ return error_msgs
+
+ def _exec_oc(self, cmd_str, extra_args, task_vars):
+ return super(Elasticsearch, self).exec_oc(
+ self.execute_module,
+ self.logging_namespace,
+ cmd_str,
+ extra_args,
+ task_vars,
+ )
diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
new file mode 100644
index 000000000..627567293
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
@@ -0,0 +1,170 @@
+"""
+Module for performing checks on an Fluentd logging deployment
+"""
+
+import json
+
+from openshift_checks import get_var
+from openshift_checks.logging.logging import LoggingCheck
+
+
+class Fluentd(LoggingCheck):
+ """Module that checks an integrated logging Fluentd deployment"""
+ name = "fluentd"
+ tags = ["health", "logging"]
+
+ logging_namespace = None
+
+ def run(self, tmp, task_vars):
+ """Check various things and gather errors. Returns: result as hash"""
+
+ self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ fluentd_pods, error = super(Fluentd, self).get_pods_for_component(
+ self.execute_module,
+ self.logging_namespace,
+ "fluentd",
+ task_vars,
+ )
+ if error:
+ return {"failed": True, "changed": False, "msg": error}
+ check_error = self.check_fluentd(fluentd_pods, task_vars)
+
+ if check_error:
+ msg = ("The following Fluentd deployment issue was found:"
+ "\n-------\n"
+ "{}".format(check_error))
+ return {"failed": True, "changed": False, "msg": msg}
+
+ # TODO(lmeyer): run it all again for the ops cluster
+ return {"failed": False, "changed": False, "msg": 'No problems found with Fluentd deployment.'}
+
+ @staticmethod
+ def _filter_fluentd_labeled_nodes(nodes_by_name, node_selector):
+ """Filter to all nodes with fluentd label. Returns dict(name: node), error string"""
+ label, value = node_selector.split('=', 1)
+ fluentd_nodes = {
+ name: node for name, node in nodes_by_name.items()
+ if node['metadata']['labels'].get(label) == value
+ }
+ if not fluentd_nodes:
+ return None, (
+ 'There are no nodes with the fluentd label {label}.\n'
+ 'This means no logs will be aggregated from the nodes.'
+ ).format(label=node_selector)
+ return fluentd_nodes, None
+
+ @staticmethod
+ def _check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars):
+ """Note if nodes are not labeled as expected. Returns: error string"""
+ intended_nodes = get_var(task_vars, 'openshift_logging_fluentd_hosts', default=['--all'])
+ if not intended_nodes or '--all' in intended_nodes:
+ intended_nodes = nodes_by_name.keys()
+ nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys())
+ if nodes_missing_labels:
+ return (
+ 'The following nodes are supposed to be labeled with {label} but are not:\n'
+ ' {nodes}\n'
+ 'Fluentd will not aggregate logs from these nodes.'
+ ).format(label=node_selector, nodes=', '.join(nodes_missing_labels))
+ return None
+
+ @staticmethod
+ def _check_nodes_have_fluentd(pods, fluentd_nodes):
+ """Make sure fluentd is on all the labeled nodes. Returns: error string"""
+ unmatched_nodes = fluentd_nodes.copy()
+ node_names_by_label = {
+ node['metadata']['labels']['kubernetes.io/hostname']: name
+ for name, node in fluentd_nodes.items()
+ }
+ node_names_by_internal_ip = {
+ address['address']: name
+ for name, node in fluentd_nodes.items()
+ for address in node['status']['addresses']
+ if address['type'] == "InternalIP"
+ }
+ for pod in pods:
+ for name in [
+ pod['spec']['nodeName'],
+ node_names_by_internal_ip.get(pod['spec']['nodeName']),
+ node_names_by_label.get(pod.get('spec', {}).get('host')),
+ ]:
+ unmatched_nodes.pop(name, None)
+ if unmatched_nodes:
+ return (
+ 'The following nodes are supposed to have a Fluentd pod but do not:\n'
+ '{nodes}'
+ 'These nodes will not have their logs aggregated.'
+ ).format(nodes=''.join(
+ " {}\n".format(name)
+ for name in unmatched_nodes.keys()
+ ))
+ return None
+
+ def _check_fluentd_pods_running(self, pods):
+ """Make sure all fluentd pods are running. Returns: error string"""
+ not_running = super(Fluentd, self).not_running_pods(pods)
+ if not_running:
+ return (
+ 'The following Fluentd pods are supposed to be running but are not:\n'
+ '{pods}'
+ 'These pods will not aggregate logs from their nodes.'
+ ).format(pods=''.join(
+ " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None'))
+ for pod in not_running
+ ))
+ return None
+
+ def check_fluentd(self, pods, task_vars):
+ """Verify fluentd is running everywhere. Returns: error string"""
+
+ node_selector = get_var(task_vars, 'openshift_logging_fluentd_nodeselector',
+ default='logging-infra-fluentd=true')
+
+ nodes_by_name, error = self.get_nodes_by_name(task_vars)
+
+ if error:
+ return error
+ fluentd_nodes, error = self._filter_fluentd_labeled_nodes(nodes_by_name, node_selector)
+ if error:
+ return error
+
+ error_msgs = []
+ error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars)
+ if error:
+ error_msgs.append(error)
+ error = self._check_nodes_have_fluentd(pods, fluentd_nodes)
+ if error:
+ error_msgs.append(error)
+ error = self._check_fluentd_pods_running(pods)
+ if error:
+ error_msgs.append(error)
+
+ # Make sure there are no extra fluentd pods
+ if len(pods) > len(fluentd_nodes):
+ error_msgs.append(
+ 'There are more Fluentd pods running than nodes labeled.\n'
+ 'This may not cause problems with logging but it likely indicates something wrong.'
+ )
+
+ return '\n'.join(error_msgs)
+
+ def get_nodes_by_name(self, task_vars):
+ """Retrieve all the node definitions. Returns: dict(name: node), error string"""
+ nodes_json = self._exec_oc("get nodes -o json", [], task_vars)
+ try:
+ nodes = json.loads(nodes_json)
+ except ValueError: # no valid json - should not happen
+ return None, "Could not obtain a list of nodes to validate fluentd. Output from oc get:\n" + nodes_json
+ if not nodes or not nodes.get('items'): # also should not happen
+ return None, "No nodes appear to be defined according to the API."
+ return {
+ node['metadata']['name']: node
+ for node in nodes['items']
+ }, None
+
+ def _exec_oc(self, cmd_str, extra_args, task_vars):
+ return super(Fluentd, self).exec_oc(self.execute_module,
+ self.logging_namespace,
+ cmd_str,
+ extra_args,
+ task_vars)
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
new file mode 100644
index 000000000..442f407b1
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -0,0 +1,229 @@
+"""
+Module for performing checks on a Kibana logging deployment
+"""
+
+import json
+import ssl
+
+try:
+ from urllib2 import HTTPError, URLError
+ import urllib2
+except ImportError:
+ from urllib.error import HTTPError, URLError
+ import urllib.request as urllib2
+
+from openshift_checks import get_var
+from openshift_checks.logging.logging import LoggingCheck
+
+
+class Kibana(LoggingCheck):
+ """Module that checks an integrated logging Kibana deployment"""
+
+ name = "kibana"
+ tags = ["health", "logging"]
+
+ logging_namespace = None
+
+ def run(self, tmp, task_vars):
+ """Check various things and gather errors. Returns: result as hash"""
+
+ self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
+ kibana_pods, error = super(Kibana, self).get_pods_for_component(
+ self.execute_module,
+ self.logging_namespace,
+ "kibana",
+ task_vars,
+ )
+ if error:
+ return {"failed": True, "changed": False, "msg": error}
+ check_error = self.check_kibana(kibana_pods)
+
+ if not check_error:
+ check_error = self._check_kibana_route(task_vars)
+
+ if check_error:
+ msg = ("The following Kibana deployment issue was found:"
+ "\n-------\n"
+ "{}".format(check_error))
+ return {"failed": True, "changed": False, "msg": msg}
+
+ # TODO(lmeyer): run it all again for the ops cluster
+ return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'}
+
+ def _verify_url_internal(self, url, task_vars):
+ """
+ Try to reach a URL from the host.
+ Returns: success (bool), reason (for failure)
+ """
+ args = dict(
+ url=url,
+ follow_redirects='none',
+ validate_certs='no', # likely to be signed with internal CA
+ # TODO(lmeyer): give users option to validate certs
+ status_code=302,
+ )
+ result = self.execute_module('uri', args, task_vars)
+ if result.get('failed'):
+ return result['msg']
+ return None
+
+ @staticmethod
+ def _verify_url_external(url):
+ """
+ Try to reach a URL from ansible control host.
+ Returns: success (bool), reason (for failure)
+ """
+ # This actually checks from the ansible control host, which may or may not
+ # really be "external" to the cluster.
+
+ # Disable SSL cert validation to work around internally signed certs
+ ctx = ssl.create_default_context()
+ ctx.check_hostname = False # or setting CERT_NONE is refused
+ ctx.verify_mode = ssl.CERT_NONE
+
+ # Verify that the url is returning a valid response
+ try:
+ # We only care if the url connects and responds
+ return_code = urllib2.urlopen(url, context=ctx).getcode()
+ except HTTPError as httperr:
+ return httperr.reason
+ except URLError as urlerr:
+ return str(urlerr)
+
+ # there appears to be no way to prevent urlopen from following redirects
+ if return_code != 200:
+ return 'Expected success (200) but got return code {}'.format(int(return_code))
+
+ return None
+
+ def check_kibana(self, pods):
+ """Check to see if Kibana is up and working. Returns: error string."""
+
+ if not pods:
+ return "There are no Kibana pods deployed, so no access to the logging UI."
+
+ not_running = self.not_running_pods(pods)
+ if len(not_running) == len(pods):
+ return "No Kibana pod is in a running state, so there is no access to the logging UI."
+ elif not_running:
+ return (
+ "The following Kibana pods are not currently in a running state:\n"
+ "{pods}"
+ "However at least one is, so service may not be impacted."
+ ).format(pods="".join(" " + pod['metadata']['name'] + "\n" for pod in not_running))
+
+ return None
+
+ def _get_kibana_url(self, task_vars):
+ """
+ Get kibana route or report error.
+ Returns: url (or empty), reason for failure
+ """
+
+ # Get logging url
+ get_route = self._exec_oc("get route logging-kibana -o json", [], task_vars)
+ if not get_route:
+ return None, 'no_route_exists'
+
+ route = json.loads(get_route)
+
+ # check that the route has been accepted by a router
+ ingress = route["status"]["ingress"]
+ # ingress can be null if there is no router, or empty if not routed
+ if not ingress or not ingress[0]:
+ return None, 'route_not_accepted'
+
+ host = route.get("spec", {}).get("host")
+ if not host:
+ return None, 'route_missing_host'
+
+ return 'https://{}/'.format(host), None
+
+ def _check_kibana_route(self, task_vars):
+ """
+ Check to see if kibana route is up and working.
+ Returns: error string
+ """
+ known_errors = dict(
+ no_route_exists=(
+ 'No route is defined for Kibana in the logging namespace,\n'
+ 'so the logging stack is not accessible. Is logging deployed?\n'
+ 'Did something remove the logging-kibana route?'
+ ),
+ route_not_accepted=(
+ 'The logging-kibana route is not being routed by any router.\n'
+ 'Is the router deployed and working?'
+ ),
+ route_missing_host=(
+ 'The logging-kibana route has no hostname defined,\n'
+ 'which should never happen. Did something alter its definition?'
+ ),
+ )
+
+ kibana_url, error = self._get_kibana_url(task_vars)
+ if not kibana_url:
+ return known_errors.get(error, error)
+
+ # first, check that kibana is reachable from the master.
+ error = self._verify_url_internal(kibana_url, task_vars)
+ if error:
+ if 'urlopen error [Errno 111] Connection refused' in error:
+ error = (
+ 'Failed to connect from this master to Kibana URL {url}\n'
+ 'Is kibana running, and is at least one router routing to it?'
+ ).format(url=kibana_url)
+ elif 'urlopen error [Errno -2] Name or service not known' in error:
+ error = (
+ 'Failed to connect from this master to Kibana URL {url}\n'
+ 'because the hostname does not resolve.\n'
+ 'Is DNS configured for the Kibana hostname?'
+ ).format(url=kibana_url)
+ elif 'Status code was not' in error:
+ error = (
+ 'A request from this master to the Kibana URL {url}\n'
+ 'did not return the correct status code (302).\n'
+ 'This could mean that Kibana is malfunctioning, the hostname is\n'
+ 'resolving incorrectly, or other network issues. The output was:\n'
+ ' {error}'
+ ).format(url=kibana_url, error=error)
+ return 'Error validating the logging Kibana route:\n' + error
+
+ # in production we would like the kibana route to work from outside the
+ # cluster too; but that may not be the case, so allow disabling just this part.
+ if not get_var(task_vars, "openshift_check_efk_kibana_external", default=True):
+ return None
+ error = self._verify_url_external(kibana_url)
+ if error:
+ if 'urlopen error [Errno 111] Connection refused' in error:
+ error = (
+ 'Failed to connect from the Ansible control host to Kibana URL {url}\n'
+ 'Is the router for the Kibana hostname exposed externally?'
+ ).format(url=kibana_url)
+ elif 'urlopen error [Errno -2] Name or service not known' in error:
+ error = (
+ 'Failed to resolve the Kibana hostname in {url}\n'
+ 'from the Ansible control host.\n'
+ 'Is DNS configured to resolve this Kibana hostname externally?'
+ ).format(url=kibana_url)
+ elif 'Expected success (200)' in error:
+ error = (
+ 'A request to Kibana at {url}\n'
+ 'returned the wrong error code:\n'
+ ' {error}\n'
+ 'This could mean that Kibana is malfunctioning, the hostname is\n'
+ 'resolving incorrectly, or other network issues.'
+ ).format(url=kibana_url, error=error)
+ error = (
+ 'Error validating the logging Kibana route:\n{error}\n'
+ 'To disable external Kibana route validation, set in your inventory:\n'
+ ' openshift_check_efk_kibana_external=False'
+ ).format(error=error)
+ return error
+ return None
+
+ def _exec_oc(self, cmd_str, extra_args, task_vars):
+ return super(Kibana, self).exec_oc(self.execute_module,
+ self.logging_namespace,
+ cmd_str,
+ extra_args,
+ task_vars)
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
new file mode 100644
index 000000000..05b4d300c
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -0,0 +1,96 @@
+"""
+Util functions for performing checks on an Elasticsearch, Fluentd, and Kibana stack
+"""
+
+import json
+import os
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+
+
+class LoggingCheck(OpenShiftCheck):
+ """Base class for logging component checks"""
+
+ name = "logging"
+
+ @classmethod
+ def is_active(cls, task_vars):
+ return super(LoggingCheck, cls).is_active(task_vars) and cls.is_first_master(task_vars)
+
+ @staticmethod
+ def is_first_master(task_vars):
+ """Run only on first master and only when logging is configured. Returns: bool"""
+ logging_deployed = get_var(task_vars, "openshift_hosted_logging_deploy", default=True)
+ # Note: It would be nice to use membership in oo_first_master group, however for now it
+ # seems best to avoid requiring that setup and just check this is the first master.
+ hostname = get_var(task_vars, "ansible_ssh_host") or [None]
+ masters = get_var(task_vars, "groups", "masters", default=None) or [None]
+ return logging_deployed and masters[0] == hostname
+
+ def run(self, tmp, task_vars):
+ pass
+
+ def get_pods_for_component(self, execute_module, namespace, logging_component, task_vars):
+ """Get all pods for a given component. Returns: list of pods for component, error string"""
+ pod_output = self.exec_oc(
+ execute_module,
+ namespace,
+ "get pods -l component={} -o json".format(logging_component),
+ [],
+ task_vars
+ )
+ try:
+ pods = json.loads(pod_output)
+ if not pods or not pods.get('items'):
+ raise ValueError()
+ except ValueError:
+ # successful run but non-parsing data generally means there were no pods in the namespace
+ return None, 'There are no pods in the {} namespace. Is logging deployed?'.format(namespace)
+
+ return pods['items'], None
+
+ @staticmethod
+ def not_running_pods(pods):
+ """Returns: list of pods not in a ready and running state"""
+ return [
+ pod for pod in pods
+ if any(
+ container['ready'] is False
+ for container in pod['status']['containerStatuses']
+ ) or not any(
+ condition['type'] == 'Ready' and condition['status'] == 'True'
+ for condition in pod['status']['conditions']
+ )
+ ]
+
+ @staticmethod
+ def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None):
+ """
+ Execute an 'oc' command in the remote host.
+ Returns: output of command and namespace,
+ or raises OpenShiftCheckException on error
+ """
+ config_base = get_var(task_vars, "openshift", "common", "config_base")
+ args = {
+ "namespace": namespace,
+ "config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
+ "cmd": cmd_str,
+ "extra_args": list(extra_args) if extra_args else [],
+ }
+
+ result = execute_module("ocutil", args, task_vars)
+ if result.get("failed"):
+ msg = (
+ 'Unexpected error using `oc` to validate the logging stack components.\n'
+ 'Error executing `oc {cmd}`:\n'
+ '{error}'
+ ).format(cmd=args['cmd'], error=result['result'])
+
+ if result['result'] == '[Errno 2] No such file or directory':
+ msg = (
+ "This host is supposed to be a master but does not have the `oc` command where expected.\n"
+ "Has an installation been run on this host yet?"
+ )
+ raise OpenShiftCheckException(msg)
+
+ return result.get("result", "")
diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py
new file mode 100644
index 000000000..1e45ae3af
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py
@@ -0,0 +1,78 @@
+"""
+Ansible module for determining if an installed version of Open vSwitch is incompatible with the
+currently installed version of OpenShift.
+"""
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks.mixins import NotContainerizedMixin
+
+
+class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
+ """Check that packages in a package_list are installed on the host
+ and are the correct version as determined by an OpenShift installation.
+ """
+
+ name = "ovs_version"
+ tags = ["health"]
+
+ openshift_to_ovs_version = {
+ "3.6": "2.6",
+ "3.5": "2.6",
+ "3.4": "2.4",
+ }
+
+ # map major release versions across releases
+ # to a common major version
+ openshift_major_release_version = {
+ "1": "3",
+ }
+
+ @classmethod
+ def is_active(cls, task_vars):
+ """Skip hosts that do not have package requirements."""
+ group_names = get_var(task_vars, "group_names", default=[])
+ master_or_node = 'masters' in group_names or 'nodes' in group_names
+ return super(OvsVersion, cls).is_active(task_vars) and master_or_node
+
+ def run(self, tmp, task_vars):
+ args = {
+ "package_list": [
+ {
+ "name": "openvswitch",
+ "version": self.get_required_ovs_version(task_vars),
+ },
+ ],
+ }
+ return self.execute_module("rpm_version", args, task_vars)
+
+ def get_required_ovs_version(self, task_vars):
+ """Return the correct Open vSwitch version for the current OpenShift version"""
+ openshift_version = self._get_openshift_version(task_vars)
+
+ if float(openshift_version) < 3.5:
+ return self.openshift_to_ovs_version["3.4"]
+
+ ovs_version = self.openshift_to_ovs_version.get(str(openshift_version))
+ if ovs_version:
+ return self.openshift_to_ovs_version[str(openshift_version)]
+
+ msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}"
+ raise OpenShiftCheckException(msg.format(openshift_version))
+
+ def _get_openshift_version(self, task_vars):
+ openshift_version = get_var(task_vars, "openshift_image_tag")
+ if openshift_version and openshift_version[0] == 'v':
+ openshift_version = openshift_version[1:]
+
+ return self._parse_version(openshift_version)
+
+ def _parse_version(self, version):
+ components = version.split(".")
+ if not components or len(components) < 2:
+ msg = "An invalid version of OpenShift was found for this host: {}"
+ raise OpenShiftCheckException(msg.format(version))
+
+ if components[0] in self.openshift_major_release_version:
+ components[0] = self.openshift_major_release_version[components[0]]
+
+ return '.'.join(components[:2])
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 682f6bd40..2e737818b 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -1,5 +1,5 @@
# pylint: disable=missing-docstring
-from openshift_checks import OpenShiftCheck, get_var
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
from openshift_checks.mixins import NotContainerizedMixin
@@ -9,6 +9,25 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
name = "package_version"
tags = ["preflight"]
+ openshift_to_ovs_version = {
+ "3.6": "2.6",
+ "3.5": "2.6",
+ "3.4": "2.4",
+ }
+
+ openshift_to_docker_version = {
+ "3.1": "1.8",
+ "3.2": "1.10",
+ "3.3": "1.10",
+ "3.4": "1.12",
+ }
+
+ # map major release versions across releases
+ # to a common major version
+ openshift_major_release_version = {
+ "1": "3",
+ }
+
@classmethod
def is_active(cls, task_vars):
"""Skip hosts that do not have package requirements."""
@@ -17,9 +36,90 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
return super(PackageVersion, cls).is_active(task_vars) and master_or_node
def run(self, tmp, task_vars):
+ rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
+ openshift_release = get_var(task_vars, "openshift_release", default='')
+ deployment_type = get_var(task_vars, "openshift_deployment_type")
+ check_multi_minor_release = deployment_type in ['openshift-enterprise']
+
args = {
- "requested_openshift_release": get_var(task_vars, "openshift_release", default=''),
- "openshift_deployment_type": get_var(task_vars, "openshift_deployment_type"),
- "rpm_prefix": get_var(task_vars, "openshift", "common", "service_type"),
+ "package_list": [
+ {
+ "name": "openvswitch",
+ "version": self.get_required_ovs_version(task_vars),
+ "check_multi": False,
+ },
+ {
+ "name": "docker",
+ "version": self.get_required_docker_version(task_vars),
+ "check_multi": False,
+ },
+ {
+ "name": "{}".format(rpm_prefix),
+ "version": openshift_release,
+ "check_multi": check_multi_minor_release,
+ },
+ {
+ "name": "{}-master".format(rpm_prefix),
+ "version": openshift_release,
+ "check_multi": check_multi_minor_release,
+ },
+ {
+ "name": "{}-node".format(rpm_prefix),
+ "version": openshift_release,
+ "check_multi": check_multi_minor_release,
+ },
+ ],
}
+
return self.execute_module("aos_version", args, tmp, task_vars)
+
+ def get_required_ovs_version(self, task_vars):
+ """Return the correct Open vSwitch version for the current OpenShift version.
+ If the current OpenShift version is >= 3.5, ensure Open vSwitch version 2.6,
+ Else ensure Open vSwitch version 2.4"""
+ openshift_version = self.get_openshift_version(task_vars)
+
+ if float(openshift_version) < 3.5:
+ return self.openshift_to_ovs_version["3.4"]
+
+ ovs_version = self.openshift_to_ovs_version.get(str(openshift_version))
+ if ovs_version:
+ return ovs_version
+
+ msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}"
+ raise OpenShiftCheckException(msg.format(openshift_version))
+
+ def get_required_docker_version(self, task_vars):
+ """Return the correct Docker version for the current OpenShift version.
+ If the OpenShift version is 3.1, ensure Docker version 1.8.
+ If the OpenShift version is 3.2 or 3.3, ensure Docker version 1.10.
+ If the current OpenShift version is >= 3.4, ensure Docker version 1.12."""
+ openshift_version = self.get_openshift_version(task_vars)
+
+ if float(openshift_version) >= 3.4:
+ return self.openshift_to_docker_version["3.4"]
+
+ docker_version = self.openshift_to_docker_version.get(str(openshift_version))
+ if docker_version:
+ return docker_version
+
+ msg = "There is no recommended version of Docker for the current version of OpenShift: {}"
+ raise OpenShiftCheckException(msg.format(openshift_version))
+
+ def get_openshift_version(self, task_vars):
+ openshift_version = get_var(task_vars, "openshift_image_tag")
+ if openshift_version and openshift_version[0] == 'v':
+ openshift_version = openshift_version[1:]
+
+ return self.parse_version(openshift_version)
+
+ def parse_version(self, version):
+ components = version.split(".")
+ if not components or len(components) < 2:
+ msg = "An invalid version of OpenShift was found for this host: {}"
+ raise OpenShiftCheckException(msg.format(version))
+
+ if components[0] in self.openshift_major_release_version:
+ components[0] = self.openshift_major_release_version[components[0]]
+
+ return '.'.join(components[:2])
diff --git a/roles/openshift_health_checker/test/aos_version_test.py b/roles/openshift_health_checker/test/aos_version_test.py
index 39c86067a..697805dd2 100644
--- a/roles/openshift_health_checker/test/aos_version_test.py
+++ b/roles/openshift_health_checker/test/aos_version_test.py
@@ -4,89 +4,118 @@ import aos_version
from collections import namedtuple
Package = namedtuple('Package', ['name', 'version'])
-expected_pkgs = set(['spam', 'eggs'])
+expected_pkgs = {
+ "spam": {
+ "name": "spam",
+ "version": "3.2.1",
+ "check_multi": False,
+ },
+ "eggs": {
+ "name": "eggs",
+ "version": "3.2.1",
+ "check_multi": False,
+ },
+}
-@pytest.mark.parametrize('pkgs, requested_release, expect_not_found', [
+@pytest.mark.parametrize('pkgs, expect_not_found', [
(
[],
- '3.2.1',
- expected_pkgs, # none found
+ {
+ "spam": {
+ "name": "spam",
+ "version": "3.2.1",
+ "check_multi": False,
+ },
+ "eggs": {
+ "name": "eggs",
+ "version": "3.2.1",
+ "check_multi": False,
+ }
+ }, # none found
),
(
[Package('spam', '3.2.1')],
- '3.2',
- ['eggs'], # completely missing
+ {
+ "eggs": {
+ "name": "eggs",
+ "version": "3.2.1",
+ "check_multi": False,
+ }
+ }, # completely missing
),
(
[Package('spam', '3.2.1'), Package('eggs', '3.3.2')],
- '3.2',
- ['eggs'], # not the right version
+ {
+ "eggs": {
+ "name": "eggs",
+ "version": "3.2.1",
+ "check_multi": False,
+ }
+ }, # not the right version
),
(
[Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
- '3.2',
- [], # all found
+ {}, # all found
),
(
[Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
- '3.2.1',
- [], # found with more specific version
+ {}, # found with more specific version
),
(
[Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5')],
- '3.2.1',
- ['spam'], # eggs found with multiple versions
+ {
+ "spam": {
+ "name": "spam",
+ "version": "3.2.1",
+ "check_multi": False,
+ }
+ }, # eggs found with multiple versions
),
])
-def test_check_pkgs_for_precise_version(pkgs, requested_release, expect_not_found):
+def test_check_pkgs_for_precise_version(pkgs, expect_not_found):
if expect_not_found:
with pytest.raises(aos_version.PreciseVersionNotFound) as e:
- aos_version._check_precise_version_found(pkgs, expected_pkgs, requested_release)
- assert set(expect_not_found) == set(e.value.problem_pkgs)
+ aos_version._check_precise_version_found(pkgs, expected_pkgs)
+
+ assert list(expect_not_found.values()) == e.value.problem_pkgs
else:
- aos_version._check_precise_version_found(pkgs, expected_pkgs, requested_release)
+ aos_version._check_precise_version_found(pkgs, expected_pkgs)
-@pytest.mark.parametrize('pkgs, requested_release, expect_higher', [
+@pytest.mark.parametrize('pkgs, expect_higher', [
(
[],
- '3.2.1',
[],
),
(
- [Package('spam', '3.2.1')],
- '3.2',
+ [Package('spam', '3.2.1.9')],
[], # more precise but not strictly higher
),
(
[Package('spam', '3.3')],
- '3.2.1',
['spam-3.3'], # lower precision, but higher
),
(
[Package('spam', '3.2.1'), Package('eggs', '3.3.2')],
- '3.2',
['eggs-3.3.2'], # one too high
),
(
[Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5'), Package('eggs', '3.4')],
- '3.2.1',
['eggs-3.4'], # multiple versions, one is higher
),
(
[Package('eggs', '3.2.1'), Package('eggs', '3.4'), Package('eggs', '3.3')],
- '3.2.1',
['eggs-3.4'], # multiple versions, two are higher
),
])
-def test_check_pkgs_for_greater_version(pkgs, requested_release, expect_higher):
+def test_check_pkgs_for_greater_version(pkgs, expect_higher):
if expect_higher:
with pytest.raises(aos_version.FoundHigherVersion) as e:
- aos_version._check_higher_version_found(pkgs, expected_pkgs, requested_release)
+ aos_version._check_higher_version_found(pkgs, expected_pkgs)
assert set(expect_higher) == set(e.value.problem_pkgs)
else:
- aos_version._check_higher_version_found(pkgs, expected_pkgs, requested_release)
+ aos_version._check_higher_version_found(pkgs, expected_pkgs)
@pytest.mark.parametrize('pkgs, expect_to_flag_pkgs', [
diff --git a/roles/openshift_health_checker/test/curator_test.py b/roles/openshift_health_checker/test/curator_test.py
new file mode 100644
index 000000000..ae108c96e
--- /dev/null
+++ b/roles/openshift_health_checker/test/curator_test.py
@@ -0,0 +1,68 @@
+import pytest
+
+from openshift_checks.logging.curator import Curator
+
+
+def canned_curator(exec_oc=None):
+ """Create a Curator check object with canned exec_oc method"""
+ check = Curator("dummy") # fails if a module is actually invoked
+ if exec_oc:
+ check._exec_oc = exec_oc
+ return check
+
+
+def assert_error(error, expect_error):
+ if expect_error:
+ assert error
+ assert expect_error in error
+ else:
+ assert not error
+
+
+plain_curator_pod = {
+ "metadata": {
+ "labels": {"component": "curator", "deploymentconfig": "logging-curator"},
+ "name": "logging-curator-1",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ "podIP": "10.10.10.10",
+ }
+}
+
+not_running_curator_pod = {
+ "metadata": {
+ "labels": {"component": "curator", "deploymentconfig": "logging-curator"},
+ "name": "logging-curator-2",
+ },
+ "status": {
+ "containerStatuses": [{"ready": False}],
+ "conditions": [{"status": "False", "type": "Ready"}],
+ "podIP": "10.10.10.10",
+ }
+}
+
+
+@pytest.mark.parametrize('pods, expect_error', [
+ (
+ [],
+ "no Curator pods",
+ ),
+ (
+ [plain_curator_pod],
+ None,
+ ),
+ (
+ [not_running_curator_pod],
+ "not currently in a running state",
+ ),
+ (
+ [plain_curator_pod, plain_curator_pod],
+ "more than one Curator pod",
+ ),
+])
+def test_get_curator_pods(pods, expect_error):
+ check = canned_curator()
+ error = check.check_curator(pods)
+ assert_error(error, expect_error)
diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py
new file mode 100644
index 000000000..b9d375d8c
--- /dev/null
+++ b/roles/openshift_health_checker/test/elasticsearch_test.py
@@ -0,0 +1,180 @@
+import pytest
+import json
+
+from openshift_checks.logging.elasticsearch import Elasticsearch
+
+task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
+
+
+def canned_elasticsearch(exec_oc=None):
+ """Create an Elasticsearch check object with canned exec_oc method"""
+ check = Elasticsearch("dummy") # fails if a module is actually invoked
+ if exec_oc:
+ check._exec_oc = exec_oc
+ return check
+
+
+def assert_error(error, expect_error):
+ if expect_error:
+ assert error
+ assert expect_error in error
+ else:
+ assert not error
+
+
+plain_es_pod = {
+ "metadata": {
+ "labels": {"component": "es", "deploymentconfig": "logging-es"},
+ "name": "logging-es",
+ },
+ "status": {
+ "conditions": [{"status": "True", "type": "Ready"}],
+ "containerStatuses": [{"ready": True}],
+ "podIP": "10.10.10.10",
+ },
+ "_test_master_name_str": "name logging-es",
+}
+
+split_es_pod = {
+ "metadata": {
+ "labels": {"component": "es", "deploymentconfig": "logging-es-2"},
+ "name": "logging-es-2",
+ },
+ "status": {
+ "conditions": [{"status": "True", "type": "Ready"}],
+ "containerStatuses": [{"ready": True}],
+ "podIP": "10.10.10.10",
+ },
+ "_test_master_name_str": "name logging-es-2",
+}
+
+
+def test_check_elasticsearch():
+ assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([], {})
+
+ # canned oc responses to match so all the checks pass
+ def _exec_oc(cmd, args, task_vars):
+ if '_cat/master' in cmd:
+ return 'name logging-es'
+ elif '/_nodes' in cmd:
+ return json.dumps(es_node_list)
+ elif '_cluster/health' in cmd:
+ return '{"status": "green"}'
+ elif ' df ' in cmd:
+ return 'IUse% Use%\n 3% 4%\n'
+ else:
+ raise Exception(cmd)
+
+ assert not canned_elasticsearch(_exec_oc).check_elasticsearch([plain_es_pod], {})
+
+
+def pods_by_name(pods):
+ return {pod['metadata']['name']: pod for pod in pods}
+
+
+@pytest.mark.parametrize('pods, expect_error', [
+ (
+ [],
+ 'No logging Elasticsearch masters',
+ ),
+ (
+ [plain_es_pod],
+ None,
+ ),
+ (
+ [plain_es_pod, split_es_pod],
+ 'Found multiple Elasticsearch masters',
+ ),
+])
+def test_check_elasticsearch_masters(pods, expect_error):
+ test_pods = list(pods)
+ check = canned_elasticsearch(lambda cmd, args, task_vars: test_pods.pop(0)['_test_master_name_str'])
+
+ errors = check._check_elasticsearch_masters(pods_by_name(pods), task_vars_config_base)
+ assert_error(''.join(errors), expect_error)
+
+
+es_node_list = {
+ 'nodes': {
+ 'random-es-name': {
+ 'host': 'logging-es',
+ }}}
+
+
+@pytest.mark.parametrize('pods, node_list, expect_error', [
+ (
+ [],
+ {},
+ 'No logging Elasticsearch masters',
+ ),
+ (
+ [plain_es_pod],
+ es_node_list,
+ None,
+ ),
+ (
+ [plain_es_pod],
+ {}, # empty list of nodes triggers KeyError
+ "Failed to query",
+ ),
+ (
+ [split_es_pod],
+ es_node_list,
+ 'does not correspond to any known ES pod',
+ ),
+])
+def test_check_elasticsearch_node_list(pods, node_list, expect_error):
+ check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(node_list))
+
+ errors = check._check_elasticsearch_node_list(pods_by_name(pods), task_vars_config_base)
+ assert_error(''.join(errors), expect_error)
+
+
+@pytest.mark.parametrize('pods, health_data, expect_error', [
+ (
+ [plain_es_pod],
+ [{"status": "green"}],
+ None,
+ ),
+ (
+ [plain_es_pod],
+ [{"no-status": "should bomb"}],
+ 'Could not retrieve cluster health status',
+ ),
+ (
+ [plain_es_pod, split_es_pod],
+ [{"status": "green"}, {"status": "red"}],
+ 'Elasticsearch cluster health status is RED',
+ ),
+])
+def test_check_elasticsearch_cluster_health(pods, health_data, expect_error):
+ test_health_data = list(health_data)
+ check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(test_health_data.pop(0)))
+
+ errors = check._check_es_cluster_health(pods_by_name(pods), task_vars_config_base)
+ assert_error(''.join(errors), expect_error)
+
+
+@pytest.mark.parametrize('disk_data, expect_error', [
+ (
+ 'df: /elasticsearch/persistent: No such file or directory\n',
+ 'Could not retrieve storage usage',
+ ),
+ (
+ 'IUse% Use%\n 3% 4%\n',
+ None,
+ ),
+ (
+ 'IUse% Use%\n 95% 40%\n',
+ 'Inode percent usage on the storage volume',
+ ),
+ (
+ 'IUse% Use%\n 3% 94%\n',
+ 'Disk percent usage on the storage volume',
+ ),
+])
+def test_check_elasticsearch_diskspace(disk_data, expect_error):
+ check = canned_elasticsearch(lambda cmd, args, task_vars: disk_data)
+
+ errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]), task_vars_config_base)
+ assert_error(''.join(errors), expect_error)
diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py
new file mode 100644
index 000000000..d151c0b19
--- /dev/null
+++ b/roles/openshift_health_checker/test/fluentd_test.py
@@ -0,0 +1,109 @@
+import pytest
+import json
+
+from openshift_checks.logging.fluentd import Fluentd
+
+
+def canned_fluentd(exec_oc=None):
+ """Create a Fluentd check object with canned exec_oc method"""
+ check = Fluentd("dummy") # fails if a module is actually invoked
+ if exec_oc:
+ check._exec_oc = exec_oc
+ return check
+
+
+def assert_error(error, expect_error):
+ if expect_error:
+ assert error
+ assert expect_error in error
+ else:
+ assert not error
+
+
+fluentd_pod_node1 = {
+ "metadata": {
+ "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+ "name": "logging-fluentd-1",
+ },
+ "spec": {"host": "node1", "nodeName": "node1"},
+ "status": {
+ "containerStatuses": [{"ready": True}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+fluentd_pod_node2_down = {
+ "metadata": {
+ "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+ "name": "logging-fluentd-2",
+ },
+ "spec": {"host": "node2", "nodeName": "node2"},
+ "status": {
+ "containerStatuses": [{"ready": False}],
+ "conditions": [{"status": "False", "type": "Ready"}],
+ }
+}
+fluentd_node1 = {
+ "metadata": {
+ "labels": {"logging-infra-fluentd": "true", "kubernetes.io/hostname": "node1"},
+ "name": "node1",
+ },
+ "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.1"}]},
+}
+fluentd_node2 = {
+ "metadata": {
+ "labels": {"logging-infra-fluentd": "true", "kubernetes.io/hostname": "hostname"},
+ "name": "node2",
+ },
+ "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.2"}]},
+}
+fluentd_node3_unlabeled = {
+ "metadata": {
+ "labels": {"kubernetes.io/hostname": "hostname"},
+ "name": "node3",
+ },
+ "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.3"}]},
+}
+
+
+@pytest.mark.parametrize('pods, nodes, expect_error', [
+ (
+ [],
+ [],
+ 'No nodes appear to be defined',
+ ),
+ (
+ [],
+ [fluentd_node3_unlabeled],
+ 'There are no nodes with the fluentd label',
+ ),
+ (
+ [],
+ [fluentd_node1, fluentd_node3_unlabeled],
+ 'Fluentd will not aggregate logs from these nodes.',
+ ),
+ (
+ [],
+ [fluentd_node2],
+ "nodes are supposed to have a Fluentd pod but do not",
+ ),
+ (
+ [fluentd_pod_node1, fluentd_pod_node1],
+ [fluentd_node1],
+ 'more Fluentd pods running than nodes labeled',
+ ),
+ (
+ [fluentd_pod_node2_down],
+ [fluentd_node2],
+ "Fluentd pods are supposed to be running",
+ ),
+ (
+ [fluentd_pod_node1],
+ [fluentd_node1],
+ None,
+ ),
+])
+def test_get_fluentd_pods(pods, nodes, expect_error):
+ check = canned_fluentd(lambda cmd, args, task_vars: json.dumps(dict(items=nodes)))
+
+ error = check.check_fluentd(pods, {})
+ assert_error(error, expect_error)
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
new file mode 100644
index 000000000..19140a1b6
--- /dev/null
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -0,0 +1,218 @@
+import pytest
+import json
+
+try:
+ import urllib2
+ from urllib2 import HTTPError, URLError
+except ImportError:
+ from urllib.error import HTTPError, URLError
+ import urllib.request as urllib2
+
+from openshift_checks.logging.kibana import Kibana
+
+
+def canned_kibana(exec_oc=None):
+ """Create a Kibana check object with canned exec_oc method"""
+ check = Kibana("dummy") # fails if a module is actually invoked
+ if exec_oc:
+ check._exec_oc = exec_oc
+ return check
+
+
+def assert_error(error, expect_error):
+ if expect_error:
+ assert error
+ assert expect_error in error
+ else:
+ assert not error
+
+
+plain_kibana_pod = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-1",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}, {"ready": True}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+not_running_kibana_pod = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-2",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}, {"ready": False}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+
+
+@pytest.mark.parametrize('pods, expect_error', [
+ (
+ [],
+ "There are no Kibana pods deployed",
+ ),
+ (
+ [plain_kibana_pod],
+ None,
+ ),
+ (
+ [not_running_kibana_pod],
+ "No Kibana pod is in a running state",
+ ),
+ (
+ [plain_kibana_pod, not_running_kibana_pod],
+ "The following Kibana pods are not currently in a running state",
+ ),
+])
+def test_check_kibana(pods, expect_error):
+ check = canned_kibana()
+ error = check.check_kibana(pods)
+ assert_error(error, expect_error)
+
+
+@pytest.mark.parametrize('route, expect_url, expect_error', [
+ (
+ None,
+ None,
+ 'no_route_exists',
+ ),
+
+ # test route with no ingress
+ (
+ {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana",
+ },
+ "status": {
+ "ingress": [],
+ },
+ "spec": {
+ "host": "hostname",
+ }
+ },
+ None,
+ 'route_not_accepted',
+ ),
+
+ # test route with no host
+ (
+ {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana",
+ },
+ "status": {
+ "ingress": [{
+ "status": True,
+ }],
+ },
+ "spec": {},
+ },
+ None,
+ 'route_missing_host',
+ ),
+
+ # test route that looks fine
+ (
+ {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana",
+ },
+ "status": {
+ "ingress": [{
+ "status": True,
+ }],
+ },
+ "spec": {
+ "host": "hostname",
+ },
+ },
+ "https://hostname/",
+ None,
+ ),
+])
+def test_get_kibana_url(route, expect_url, expect_error):
+ check = canned_kibana(lambda cmd, args, task_vars: json.dumps(route) if route else "")
+
+ url, error = check._get_kibana_url({})
+ if expect_url:
+ assert url == expect_url
+ else:
+ assert not url
+ if expect_error:
+ assert error == expect_error
+ else:
+ assert not error
+
+
+@pytest.mark.parametrize('exec_result, expect', [
+ (
+ 'urlopen error [Errno 111] Connection refused',
+ 'at least one router routing to it?',
+ ),
+ (
+ 'urlopen error [Errno -2] Name or service not known',
+ 'DNS configured for the Kibana hostname?',
+ ),
+ (
+ 'Status code was not [302]: HTTP Error 500: Server error',
+ 'did not return the correct status code',
+ ),
+ (
+ 'bork bork bork',
+ 'bork bork bork', # should pass through
+ ),
+])
+def test_verify_url_internal_failure(exec_result, expect):
+ check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result))
+ check._get_kibana_url = lambda task_vars: ('url', None)
+
+ error = check._check_kibana_route({})
+ assert_error(error, expect)
+
+
+@pytest.mark.parametrize('lib_result, expect', [
+ (
+ HTTPError('url', 500, "it broke", hdrs=None, fp=None),
+ 'it broke',
+ ),
+ (
+ URLError('it broke'),
+ 'it broke',
+ ),
+ (
+ 302,
+ 'returned the wrong error code',
+ ),
+ (
+ 200,
+ None,
+ ),
+])
+def test_verify_url_external_failure(lib_result, expect, monkeypatch):
+
+ class _http_return:
+
+ def __init__(self, code):
+ self.code = code
+
+ def getcode(self):
+ return self.code
+
+ def urlopen(url, context):
+ if type(lib_result) is int:
+ return _http_return(lib_result)
+ raise lib_result
+ monkeypatch.setattr(urllib2, 'urlopen', urlopen)
+
+ check = canned_kibana()
+ check._get_kibana_url = lambda task_vars: ('url', None)
+ check._verify_url_internal = lambda url, task_vars: None
+
+ error = check._check_kibana_route({})
+ assert_error(error, expect)
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
new file mode 100644
index 000000000..b6db34fe3
--- /dev/null
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -0,0 +1,137 @@
+import pytest
+import json
+
+from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
+
+task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
+
+
+logging_namespace = "logging"
+
+
+def canned_loggingcheck(exec_oc=None):
+ """Create a LoggingCheck object with canned exec_oc method"""
+ check = LoggingCheck("dummy") # fails if a module is actually invoked
+ check.logging_namespace = 'logging'
+ if exec_oc:
+ check.exec_oc = exec_oc
+ return check
+
+
+def assert_error(error, expect_error):
+ if expect_error:
+ assert error
+ assert expect_error in error
+ else:
+ assert not error
+
+
+plain_es_pod = {
+ "metadata": {
+ "labels": {"component": "es", "deploymentconfig": "logging-es"},
+ "name": "logging-es",
+ },
+ "status": {
+ "conditions": [{"status": "True", "type": "Ready"}],
+ "containerStatuses": [{"ready": True}],
+ "podIP": "10.10.10.10",
+ },
+ "_test_master_name_str": "name logging-es",
+}
+
+plain_kibana_pod = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-1",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}, {"ready": True}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+
+fluentd_pod_node1 = {
+ "metadata": {
+ "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+ "name": "logging-fluentd-1",
+ },
+ "spec": {"host": "node1", "nodeName": "node1"},
+ "status": {
+ "containerStatuses": [{"ready": True}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+
+plain_curator_pod = {
+ "metadata": {
+ "labels": {"component": "curator", "deploymentconfig": "logging-curator"},
+ "name": "logging-curator-1",
+ },
+ "status": {
+ "containerStatuses": [{"ready": True}],
+ "conditions": [{"status": "True", "type": "Ready"}],
+ "podIP": "10.10.10.10",
+ }
+}
+
+
+@pytest.mark.parametrize('problem, expect', [
+ ("[Errno 2] No such file or directory", "supposed to be a master"),
+ ("Permission denied", "Unexpected error using `oc`"),
+])
+def test_oc_failure(problem, expect):
+ def execute_module(module_name, args, task_vars):
+ if module_name == "ocutil":
+ return dict(failed=True, result=problem)
+ return dict(changed=False)
+
+ check = LoggingCheck({})
+
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.exec_oc(execute_module, logging_namespace, 'get foo', [], task_vars=task_vars_config_base)
+ assert expect in str(excinfo)
+
+
+groups_with_first_master = dict(masters=['this-host', 'other-host'])
+groups_with_second_master = dict(masters=['other-host', 'this-host'])
+groups_not_a_master = dict(masters=['other-host'])
+
+
+@pytest.mark.parametrize('groups, logging_deployed, is_active', [
+ (groups_with_first_master, True, True),
+ (groups_with_first_master, False, False),
+ (groups_not_a_master, True, False),
+ (groups_with_second_master, True, False),
+ (groups_not_a_master, True, False),
+])
+def test_is_active(groups, logging_deployed, is_active):
+ task_vars = dict(
+ ansible_ssh_host='this-host',
+ groups=groups,
+ openshift_hosted_logging_deploy=logging_deployed,
+ )
+
+ assert LoggingCheck.is_active(task_vars=task_vars) == is_active
+
+
+@pytest.mark.parametrize('pod_output, expect_pods, expect_error', [
+ (
+ 'No resources found.',
+ None,
+ 'There are no pods in the logging namespace',
+ ),
+ (
+ json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}),
+ [plain_es_pod],
+ None,
+ ),
+])
+def test_get_pods_for_component(pod_output, expect_pods, expect_error):
+ check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: pod_output)
+ pods, error = check.get_pods_for_component(
+ lambda name, args, task_vars: {},
+ logging_namespace,
+ "es",
+ {}
+ )
+ assert_error(error, expect_error)
diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py
new file mode 100644
index 000000000..6494e1c06
--- /dev/null
+++ b/roles/openshift_health_checker/test/ovs_version_test.py
@@ -0,0 +1,89 @@
+import pytest
+
+from openshift_checks.ovs_version import OvsVersion, OpenShiftCheckException
+
+
+def test_openshift_version_not_supported():
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ return {}
+
+ openshift_release = '111.7.0'
+
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_release=openshift_release,
+ openshift_image_tag='v' + openshift_release,
+ openshift_deployment_type='origin',
+ )
+
+ check = OvsVersion(execute_module=execute_module)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+
+ assert "no recommended version of Open vSwitch" in str(excinfo.value)
+
+
+def test_invalid_openshift_release_format():
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ return {}
+
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_image_tag='v0',
+ openshift_deployment_type='origin',
+ )
+
+ check = OvsVersion(execute_module=execute_module)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+ assert "invalid version" in str(excinfo.value)
+
+
+@pytest.mark.parametrize('openshift_release,expected_ovs_version', [
+ ("3.5", "2.6"),
+ ("3.6", "2.6"),
+ ("3.4", "2.4"),
+ ("3.3", "2.4"),
+ ("1.0", "2.4"),
+])
+def test_ovs_package_version(openshift_release, expected_ovs_version):
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_release=openshift_release,
+ openshift_image_tag='v' + openshift_release,
+ )
+ return_value = object()
+
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ assert module_name == 'rpm_version'
+ assert "package_list" in module_args
+
+ for pkg in module_args["package_list"]:
+ if pkg["name"] == "openvswitch":
+ assert pkg["version"] == expected_ovs_version
+
+ return return_value
+
+ check = OvsVersion(execute_module=execute_module)
+ result = check.run(tmp=None, task_vars=task_vars)
+ assert result is return_value
+
+
+@pytest.mark.parametrize('group_names,is_containerized,is_active', [
+ (['masters'], False, True),
+ # ensure check is skipped on containerized installs
+ (['masters'], True, False),
+ (['nodes'], False, True),
+ (['masters', 'nodes'], False, True),
+ (['masters', 'etcd'], False, True),
+ ([], False, False),
+ (['etcd'], False, False),
+ (['lb'], False, False),
+ (['nfs'], False, False),
+])
+def test_ovs_version_skip_when_not_master_nor_node(group_names, is_containerized, is_active):
+ task_vars = dict(
+ group_names=group_names,
+ openshift=dict(common=dict(is_containerized=is_containerized)),
+ )
+ assert OvsVersion.is_active(task_vars=task_vars) == is_active
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index 196d9816a..91eace512 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -1,24 +1,132 @@
import pytest
-from openshift_checks.package_version import PackageVersion
+from openshift_checks.package_version import PackageVersion, OpenShiftCheckException
-def test_package_version():
+@pytest.mark.parametrize('openshift_release, extra_words', [
+ ('111.7.0', ["no recommended version of Open vSwitch"]),
+ ('0.0.0', ["no recommended version of Docker"]),
+])
+def test_openshift_version_not_supported(openshift_release, extra_words):
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ return {}
+
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_release=openshift_release,
+ openshift_image_tag='v' + openshift_release,
+ openshift_deployment_type='origin',
+ )
+
+ check = PackageVersion(execute_module=execute_module)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+
+ for word in extra_words:
+ assert word in str(excinfo.value)
+
+
+def test_invalid_openshift_release_format():
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ return {}
+
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_image_tag='v0',
+ openshift_deployment_type='origin',
+ )
+
+ check = PackageVersion(execute_module=execute_module)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+ assert "invalid version" in str(excinfo.value)
+
+
+@pytest.mark.parametrize('openshift_release', [
+ "3.5",
+ "3.6",
+ "3.4",
+ "3.3",
+])
+def test_package_version(openshift_release):
task_vars = dict(
openshift=dict(common=dict(service_type='origin')),
- openshift_release='3.5',
+ openshift_release=openshift_release,
+ openshift_image_tag='v' + openshift_release,
openshift_deployment_type='origin',
)
return_value = object()
def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
assert module_name == 'aos_version'
- assert 'requested_openshift_release' in module_args
- assert 'openshift_deployment_type' in module_args
- assert 'rpm_prefix' in module_args
- assert module_args['requested_openshift_release'] == task_vars['openshift_release']
- assert module_args['openshift_deployment_type'] == task_vars['openshift_deployment_type']
- assert module_args['rpm_prefix'] == task_vars['openshift']['common']['service_type']
+ assert "package_list" in module_args
+
+ for pkg in module_args["package_list"]:
+ if "-master" in pkg["name"] or "-node" in pkg["name"]:
+ assert pkg["version"] == task_vars["openshift_release"]
+
+ return return_value
+
+ check = PackageVersion(execute_module=execute_module)
+ result = check.run(tmp=None, task_vars=task_vars)
+ assert result is return_value
+
+
+@pytest.mark.parametrize('deployment_type,openshift_release,expected_ovs_version', [
+ ("openshift-enterprise", "3.5", "2.6"),
+ ("origin", "3.6", "2.6"),
+ ("openshift-enterprise", "3.4", "2.4"),
+ ("origin", "3.3", "2.4"),
+])
+def test_ovs_package_version(deployment_type, openshift_release, expected_ovs_version):
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_release=openshift_release,
+ openshift_image_tag='v' + openshift_release,
+ openshift_deployment_type=deployment_type,
+ )
+ return_value = object()
+
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ assert module_name == 'aos_version'
+ assert "package_list" in module_args
+
+ for pkg in module_args["package_list"]:
+ if pkg["name"] == "openvswitch":
+ assert pkg["version"] == expected_ovs_version
+
+ return return_value
+
+ check = PackageVersion(execute_module=execute_module)
+ result = check.run(tmp=None, task_vars=task_vars)
+ assert result is return_value
+
+
+@pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [
+ ("origin", "3.5", "1.12"),
+ ("openshift-enterprise", "3.4", "1.12"),
+ ("origin", "3.3", "1.10"),
+ ("openshift-enterprise", "3.2", "1.10"),
+ ("origin", "3.1", "1.8"),
+ ("openshift-enterprise", "3.1", "1.8"),
+])
+def test_docker_package_version(deployment_type, openshift_release, expected_docker_version):
+ task_vars = dict(
+ openshift=dict(common=dict(service_type='origin')),
+ openshift_release=openshift_release,
+ openshift_image_tag='v' + openshift_release,
+ openshift_deployment_type=deployment_type,
+ )
+ return_value = object()
+
+ def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
+ assert module_name == 'aos_version'
+ assert "package_list" in module_args
+
+ for pkg in module_args["package_list"]:
+ if pkg["name"] == "docker":
+ assert pkg["version"] == expected_docker_version
+
return return_value
check = PackageVersion(execute_module=execute_module)
diff --git a/roles/openshift_health_checker/test/rpm_version_test.py b/roles/openshift_health_checker/test/rpm_version_test.py
new file mode 100644
index 000000000..2f09ef965
--- /dev/null
+++ b/roles/openshift_health_checker/test/rpm_version_test.py
@@ -0,0 +1,82 @@
+import pytest
+import rpm_version
+
+expected_pkgs = {
+ "spam": {
+ "name": "spam",
+ "version": "3.2.1",
+ },
+ "eggs": {
+ "name": "eggs",
+ "version": "3.2.1",
+ },
+}
+
+
+@pytest.mark.parametrize('pkgs, expect_not_found', [
+ (
+ {},
+ ["spam", "eggs"], # none found
+ ),
+ (
+ {"spam": ["3.2.1", "4.5.1"]},
+ ["eggs"], # completely missing
+ ),
+ (
+ {
+ "spam": ["3.2.1", "4.5.1"],
+ "eggs": ["3.2.1"],
+ },
+ [], # all found
+ ),
+])
+def test_check_pkg_found(pkgs, expect_not_found):
+ if expect_not_found:
+ with pytest.raises(rpm_version.RpmVersionException) as e:
+ rpm_version._check_pkg_versions(pkgs, expected_pkgs)
+
+ assert "not found to be installed" in str(e.value)
+ assert set(expect_not_found) == set(e.value.problem_pkgs)
+ else:
+ rpm_version._check_pkg_versions(pkgs, expected_pkgs)
+
+
+@pytest.mark.parametrize('pkgs, expect_not_found', [
+ (
+ {
+ 'spam': ['3.2.1'],
+ 'eggs': ['3.3.2'],
+ },
+ {
+ "eggs": {
+ "required_version": "3.2",
+ "found_versions": ["3.3"],
+ }
+ }, # not the right version
+ ),
+ (
+ {
+ 'spam': ['3.1.2', "3.3.2"],
+ 'eggs': ['3.3.2', "1.2.3"],
+ },
+ {
+ "eggs": {
+ "required_version": "3.2",
+ "found_versions": ["3.3", "1.2"],
+ },
+ "spam": {
+ "required_version": "3.2",
+ "found_versions": ["3.1", "3.3"],
+ }
+ }, # not the right version
+ ),
+])
+def test_check_pkg_version_found(pkgs, expect_not_found):
+ if expect_not_found:
+ with pytest.raises(rpm_version.RpmVersionException) as e:
+ rpm_version._check_pkg_versions(pkgs, expected_pkgs)
+
+ assert "found to be installed with an incorrect version" in str(e.value)
+ assert expect_not_found == e.value.problem_pkgs
+ else:
+ rpm_version._check_pkg_versions(pkgs, expected_pkgs)
diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md
index 3c410eff2..0c60ef6fd 100644
--- a/roles/openshift_logging/README.md
+++ b/roles/openshift_logging/README.md
@@ -124,3 +124,34 @@ Elasticsearch OPS too, if using an OPS cluster:
- `openshift_logging_es_ops_ca_ext`: The location of the CA cert for the cert
Elasticsearch uses for the external TLS server cert (default is the internal
CA)
+
+### mux - secure_forward listener service
+- `openshift_logging_use_mux`: Default `False`. If this is `True`, a service
+ called `mux` will be deployed. This service will act as a Fluentd
+ secure_forward forwarder for the node agent Fluentd daemonsets running in the
+ cluster. This can be used to reduce the number of connections to the
+ OpenShift API server, by using `mux` and configuring each node Fluentd to
+ send raw logs to mux and turn off the k8s metadata plugin.
+- `openshift_logging_mux_allow_external`: Default `False`. If this is `True`,
+ the `mux` service will be deployed, and it will be configured to allow
+ Fluentd clients running outside of the cluster to send logs using
+ secure_forward. This allows OpenShift logging to be used as a central
+ logging service for clients other than OpenShift, or other OpenShift
+ clusters.
+- `openshift_logging_use_mux_client`: Default `False`. If this is `True`, the
+ node agent Fluentd services will be configured to send logs to the mux
+ service rather than directly to Elasticsearch.
+- `openshift_logging_mux_hostname`: Default is "mux." +
+ `openshift_master_default_subdomain`. This is the hostname *external*_
+ clients will use to connect to mux, and will be used in the TLS server cert
+ subject.
+- `openshift_logging_mux_port`: 24284
+- `openshift_logging_mux_cpu_limit`: 100m
+- `openshift_logging_mux_memory_limit`: 512Mi
+- `openshift_logging_mux_default_namespaces`: Default `["mux-undefined"]` - the
+ first value in the list is the namespace to use for undefined projects,
+ followed by any additional namespaces to create by default - users will
+ typically not need to set this
+- `openshift_logging_mux_namespaces`: Default `[]` - additional namespaces to
+ create for _external_ mux clients to associate with their logs - users will
+ need to set this
diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml
index 837c54067..573cbdd09 100644
--- a/roles/openshift_logging/defaults/main.yml
+++ b/roles/openshift_logging/defaults/main.yml
@@ -160,8 +160,13 @@ openshift_logging_use_mux: "{{ openshift_logging_mux_allow_external | default(Fa
openshift_logging_use_mux_client: False
openshift_logging_mux_hostname: "{{ 'mux.' ~ (openshift_master_default_subdomain | default('router.default.svc.cluster.local', true)) }}"
openshift_logging_mux_port: 24284
-openshift_logging_mux_cpu_limit: 100m
-openshift_logging_mux_memory_limit: 512Mi
+openshift_logging_mux_cpu_limit: 500m
+openshift_logging_mux_memory_limit: 1Gi
+# the namespace to use for undefined projects should come first, followed by any
+# additional namespaces to create by default - users will typically not need to set this
+openshift_logging_mux_default_namespaces: ["mux-undefined"]
+# extra namespaces to create for mux clients - users will need to set this
+openshift_logging_mux_namespaces: []
# following can be uncommented to provide values for configmaps -- take care when providing file contents as it may cause your cluster to not operate correctly
#es_logging_contents:
diff --git a/roles/openshift_logging/tasks/delete_logging.yaml b/roles/openshift_logging/tasks/delete_logging.yaml
index 0c7152b16..6d023a02d 100644
--- a/roles/openshift_logging/tasks/delete_logging.yaml
+++ b/roles/openshift_logging/tasks/delete_logging.yaml
@@ -32,9 +32,8 @@
# delete our old secrets
- name: delete logging secrets
- oc_obj:
+ oc_secret:
state: absent
- kind: secret
namespace: "{{ openshift_logging_namespace }}"
name: "{{ item }}"
with_items:
diff --git a/roles/openshift_logging/tasks/generate_certs.yaml b/roles/openshift_logging/tasks/generate_certs.yaml
index 7169c4036..040356e3d 100644
--- a/roles/openshift_logging/tasks/generate_certs.yaml
+++ b/roles/openshift_logging/tasks/generate_certs.yaml
@@ -124,7 +124,7 @@
- system.logging.mux
loop_control:
loop_var: node_name
- when: openshift_logging_use_mux
+ when: openshift_logging_use_mux | bool
- name: Generate PEM cert for Elasticsearch external route
include: generate_pems.yaml component={{node_name}}
diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml
index dde76b142..7c1062b77 100644
--- a/roles/openshift_logging/tasks/install_logging.yaml
+++ b/roles/openshift_logging/tasks/install_logging.yaml
@@ -10,7 +10,7 @@
name: "{{ openshift_logging_namespace }}"
node_selector: "{{ openshift_logging_nodeselector | default(null) }}"
-- name: Labelling logging project
+- name: Labeling logging project
oc_label:
state: present
kind: namespace
@@ -23,7 +23,7 @@
- openshift_logging_labels is defined
- openshift_logging_labels is dict
-- name: Labelling logging project
+- name: Labeling logging project
oc_label:
state: present
kind: namespace
@@ -78,6 +78,8 @@
- "{{ openshift_logging_facts.elasticsearch.deploymentconfigs }}"
- "{{ openshift_logging_facts.elasticsearch.pvcs }}"
- "{{ es_indices }}"
+ when:
+ - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count > 0
# Create any new DC that may be required
- include_role:
@@ -124,6 +126,7 @@
- "{{ es_ops_indices }}"
when:
- openshift_logging_use_ops | bool
+ - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count > 0
# Create any new DC that may be required
- include_role:
diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2
index 681f5a7e6..58c325c8a 100644
--- a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2
+++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2
@@ -38,6 +38,7 @@ gateway:
io.fabric8.elasticsearch.authentication.users: ["system.logging.kibana", "system.logging.fluentd", "system.logging.curator", "system.admin"]
io.fabric8.elasticsearch.kibana.mapping.app: /usr/share/elasticsearch/index_patterns/com.redhat.viaq-openshift.index-pattern.json
io.fabric8.elasticsearch.kibana.mapping.ops: /usr/share/elasticsearch/index_patterns/com.redhat.viaq-openshift.index-pattern.json
+io.fabric8.elasticsearch.kibana.mapping.empty: /usr/share/elasticsearch/index_patterns/com.redhat.viaq-openshift.index-pattern.json
openshift.config:
use_common_data_model: true
diff --git a/roles/openshift_logging_kibana/tasks/main.yaml b/roles/openshift_logging_kibana/tasks/main.yaml
index 55b28ee24..d13255386 100644
--- a/roles/openshift_logging_kibana/tasks/main.yaml
+++ b/roles/openshift_logging_kibana/tasks/main.yaml
@@ -179,7 +179,7 @@
# path: "{{ generated_certs_dir }}/kibana-internal.key"
#- name: server-cert
# path: "{{ generated_certs_dir }}/kibana-internal.crt"
- #- name: server-tls
+ #- name: server-tls.json
# path: "{{ generated_certs_dir }}/server-tls.json"
contents:
- path: oauth-secret
@@ -190,7 +190,7 @@
data: "{{ key_pairs | entry_from_named_pair('kibana_internal_key') | b64decode }}"
- path: server-cert
data: "{{ key_pairs | entry_from_named_pair('kibana_internal_cert') | b64decode }}"
- - path: server-tls
+ - path: server-tls.json
data: "{{ key_pairs | entry_from_named_pair('server_tls') | b64decode }}"
# create Kibana DC
diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml
index 8aaa28706..10fa4372c 100644
--- a/roles/openshift_logging_mux/defaults/main.yml
+++ b/roles/openshift_logging_mux/defaults/main.yml
@@ -9,8 +9,8 @@ openshift_logging_mux_namespace: logging
### Common settings
openshift_logging_mux_nodeselector: "{{ openshift_hosted_logging_mux_nodeselector_label | default('') | map_from_pairs }}"
-openshift_logging_mux_cpu_limit: 100m
-openshift_logging_mux_memory_limit: 512Mi
+openshift_logging_mux_cpu_limit: 500m
+openshift_logging_mux_memory_limit: 1Gi
openshift_logging_mux_replicas: 1
@@ -26,9 +26,14 @@ openshift_logging_mux_use_journal: "{{ openshift_hosted_logging_use_journal | de
openshift_logging_mux_journal_source: "{{ openshift_hosted_logging_journal_source | default('') }}"
openshift_logging_mux_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}"
-openshift_logging_mux_allow_external: false
+openshift_logging_mux_allow_external: False
openshift_logging_mux_hostname: "{{ 'mux.' ~ (openshift_master_default_subdomain | default('router.default.svc.cluster.local', true)) }}"
openshift_logging_mux_port: 24284
+# the namespace to use for undefined projects should come first, followed by any
+# additional namespaces to create by default - users will typically not need to set this
+openshift_logging_mux_default_namespaces: ["mux-undefined"]
+# extra namespaces to create for mux clients - users will need to set this
+openshift_logging_mux_namespaces: []
openshift_logging_mux_app_client_cert: /etc/fluent/keys/cert
openshift_logging_mux_app_client_key: /etc/fluent/keys/key
diff --git a/roles/openshift_logging_mux/tasks/main.yaml b/roles/openshift_logging_mux/tasks/main.yaml
index 432cab9e9..54af40070 100644
--- a/roles/openshift_logging_mux/tasks/main.yaml
+++ b/roles/openshift_logging_mux/tasks/main.yaml
@@ -130,16 +130,14 @@
selector:
component: mux
provider: openshift
- # pending #4091
- #labels:
- #- logging-infra: 'support'
+ labels:
+ logging-infra: 'support'
ports:
- name: mux-forward
port: "{{ openshift_logging_mux_port }}"
targetPort: "mux-forward"
- # pending #4091
- # externalIPs:
- # - "{{ ansible_eth0.ipv4.address }}"
+ external_ips:
+ - "{{ ansible_eth0.ipv4.address }}"
when: openshift_logging_mux_allow_external | bool
- name: Set logging-mux service for internal communication
@@ -150,9 +148,8 @@
selector:
component: mux
provider: openshift
- # pending #4091
- #labels:
- #- logging-infra: 'support'
+ labels:
+ logging-infra: 'support'
ports:
- name: mux-forward
port: "{{ openshift_logging_mux_port }}"
@@ -190,6 +187,13 @@
- "{{ tempdir }}/templates/logging-mux-dc.yaml"
delete_after: true
+- name: Add mux namespaces
+ oc_project:
+ state: present
+ name: "{{ item }}"
+ node_selector: ""
+ with_items: "{{ openshift_logging_mux_namespaces | union(openshift_logging_mux_default_namespaces) }}"
+
- name: Delete temp directory
file:
name: "{{ tempdir }}"
diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2
index 770a2bfbd..502cd3347 100644
--- a/roles/openshift_logging_mux/templates/mux.j2
+++ b/roles/openshift_logging_mux/templates/mux.j2
@@ -109,7 +109,7 @@ spec:
name: logging-mux
- name: certs
secret:
- secretName: logging-mux
+ secretName: logging-fluentd
- name: dockerhostname
hostPath:
path: /etc/hostname
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index 5522fef26..aed5598c0 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -177,6 +177,7 @@
# https://github.com/openshift/origin/issues/6447
- name: Start and enable master
systemd:
+ daemon_reload: yes
name: "{{ openshift.common.service_type }}-master"
enabled: yes
state: started
diff --git a/roles/openshift_master_certificates/tasks/main.yml b/roles/openshift_master_certificates/tasks/main.yml
index 9706da24b..62413536b 100644
--- a/roles/openshift_master_certificates/tasks/main.yml
+++ b/roles/openshift_master_certificates/tasks/main.yml
@@ -71,7 +71,7 @@
delegate_to: "{{ openshift_ca_host }}"
run_once: true
-- name: Generate the master client config
+- name: Generate the loopback master client config
command: >
{{ hostvars[openshift_ca_host].openshift.common.client_binary }} adm create-api-client-config
{% for named_ca_certificate in openshift.master.named_certificates | default([]) | oo_collect('cafile') %}
@@ -80,8 +80,8 @@
--certificate-authority={{ openshift_ca_cert }}
--client-dir={{ openshift_generated_configs_dir }}/master-{{ hostvars[item].openshift.common.hostname }}
--groups=system:masters,system:openshift-master
- --master={{ openshift.master.api_url }}
- --public-master={{ openshift.master.public_api_url }}
+ --master={{ hostvars[item].openshift.master.loopback_api_url }}
+ --public-master={{ hostvars[item].openshift.master.loopback_api_url }}
--signer-cert={{ openshift_ca_cert }}
--signer-key={{ openshift_ca_key }}
--signer-serial={{ openshift_ca_serial }}
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index a8beaa060..573051504 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -206,6 +206,7 @@
- name: Start and enable node dep
systemd:
+ daemon_reload: yes
name: "{{ openshift.common.service_type }}-node-dep"
enabled: yes
state: started
diff --git a/roles/openshift_node_upgrade/tasks/restart.yml b/roles/openshift_node_upgrade/tasks/restart.yml
index e576228ba..508eb9358 100644
--- a/roles/openshift_node_upgrade/tasks/restart.yml
+++ b/roles/openshift_node_upgrade/tasks/restart.yml
@@ -5,6 +5,14 @@
# - openshift.common.hostname
# - openshift.master.api_port
+# NOTE: This is needed to make sure we are using the correct set
+# of systemd unit files. The RPMs lay down defaults but
+# the install/upgrade may override them in /etc/systemd/system/.
+# NOTE: We don't use the systemd module as some versions of the module
+# require a service to be part of the call.
+- name: Reload systemd to ensure latest unit files
+ command: systemctl daemon-reload
+
- name: Restart docker
service:
name: "{{ openshift.docker.service_name }}"
diff --git a/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml b/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml
index 480e87d58..06a2d16ba 100644
--- a/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml
+++ b/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml
@@ -12,3 +12,18 @@
- name: Ensure python-yaml present for config upgrade
package: name=PyYAML state=present
when: not openshift.common.is_atomic | bool
+
+- name: Install Node service file
+ template:
+ dest: "/etc/systemd/system/{{ openshift.common.service_type }}-node.service"
+ src: "{{ openshift.common.service_type }}-node.service.j2"
+ register: l_node_unit
+
+# NOTE: This is needed to make sure we are using the correct set
+# of systemd unit files. The RPMs lay down defaults but
+# the install/upgrade may override them in /etc/systemd/system/.
+# NOTE: We don't use the systemd module as some versions of the module
+# require a service to be part of the call.
+- name: Reload systemd units
+ command: systemctl daemon-reload
+ when: l_node_unit | changed
diff --git a/roles/openshift_node_upgrade/templates/atomic-openshift-node.service.j2 b/roles/openshift_node_upgrade/templates/atomic-openshift-node.service.j2
new file mode 120000
index 000000000..6041fb13a
--- /dev/null
+++ b/roles/openshift_node_upgrade/templates/atomic-openshift-node.service.j2
@@ -0,0 +1 @@
+../../openshift_node/templates/atomic-openshift-node.service.j2 \ No newline at end of file
diff --git a/roles/openshift_node_upgrade/templates/origin-node.service.j2 b/roles/openshift_node_upgrade/templates/origin-node.service.j2
new file mode 120000
index 000000000..79c45a303
--- /dev/null
+++ b/roles/openshift_node_upgrade/templates/origin-node.service.j2
@@ -0,0 +1 @@
+../../openshift_node/templates/origin-node.service.j2 \ No newline at end of file