diff options
Diffstat (limited to 'roles')
19 files changed, 451 insertions, 13 deletions
diff --git a/roles/etcd/defaults/main.yaml b/roles/etcd/defaults/main.yaml index e45f53219..c0d1d5946 100644 --- a/roles/etcd/defaults/main.yaml +++ b/roles/etcd/defaults/main.yaml @@ -1,10 +1,4 @@ --- -etcd_service: "{{ 'etcd' if openshift.common.is_etcd_system_container | bool or not etcd_is_containerized | bool else 'etcd_container' }}" -etcd_client_port: 2379 -etcd_peer_port: 2380 -etcd_url_scheme: http -etcd_peer_url_scheme: http - etcd_initial_cluster_state: new etcd_initial_cluster_token: etcd-cluster-1 diff --git a/roles/etcd_common/defaults/main.yml b/roles/etcd_common/defaults/main.yml index d12e6a07f..e1a080b34 100644 --- a/roles/etcd_common/defaults/main.yml +++ b/roles/etcd_common/defaults/main.yml @@ -1,6 +1,9 @@ --- +# runc, docker, host +r_etcd_common_etcd_runtime: "docker" + # etcd server vars -etcd_conf_dir: "{{ '/etc/etcd' if not openshift.common.is_etcd_system_container else '/var/lib/etcd/etcd.etcd/etc' }}" +etcd_conf_dir: "{{ '/etc/etcd' if r_etcd_common_etcd_runtime != 'runc' else '/var/lib/etcd/etcd.etcd/etc' }}" etcd_system_container_conf_dir: /var/lib/etcd/etc etcd_conf_file: "{{ etcd_conf_dir }}/etcd.conf" etcd_ca_file: "{{ etcd_conf_dir }}/ca.crt" @@ -38,3 +41,9 @@ etcd_is_thirdparty: False # etcd dir vars etcd_data_dir: /var/lib/etcd/ + +# etcd ports and protocols +etcd_client_port: 2379 +etcd_peer_port: 2380 +etcd_url_scheme: http +etcd_peer_url_scheme: http diff --git a/roles/etcd_common/vars/main.yml b/roles/etcd_common/vars/main.yml new file mode 100644 index 000000000..00d697776 --- /dev/null +++ b/roles/etcd_common/vars/main.yml @@ -0,0 +1,4 @@ +--- +etcd_service: "{{ 'etcd_container' if r_etcd_common_etcd_runtime == 'docker' else 'etcd' }}" +# Location of the service file is fixed and not meant to be changed +etcd_service_file: "/etc/systemd/system/{{ etcd_service }}.service" diff --git a/roles/etcd_upgrade/defaults/main.yml b/roles/etcd_upgrade/defaults/main.yml new file mode 100644 index 000000000..01ad8a268 --- /dev/null +++ b/roles/etcd_upgrade/defaults/main.yml @@ -0,0 +1,9 @@ +--- +r_etcd_upgrade_action: upgrade +r_etcd_upgrade_mechanism: rpm +r_etcd_upgrade_embedded_etcd: False + +# etcd run on a host => use etcdctl command directly +# etcd run as a docker container => use docker exec +# etcd run as a runc container => use runc exec +etcdctl_command: "{{ 'etcdctl' if r_etcd_common_etcd_runtime == 'host' or r_etcd_upgrade_embedded_etcd | bool else 'docker exec etcd_container etcdctl' if r_etcd_common_etcd_runtime == 'docker' else 'runc exec etcd etcdctl' }}" diff --git a/roles/etcd_upgrade/meta/main.yml b/roles/etcd_upgrade/meta/main.yml new file mode 100644 index 000000000..018bdc8d7 --- /dev/null +++ b/roles/etcd_upgrade/meta/main.yml @@ -0,0 +1,16 @@ +--- +galaxy_info: + author: Jan Chaloupka + description: + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 1.9 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: +- role: etcd_common diff --git a/roles/etcd_upgrade/tasks/backup.yml b/roles/etcd_upgrade/tasks/backup.yml new file mode 100644 index 000000000..1ea6fc59f --- /dev/null +++ b/roles/etcd_upgrade/tasks/backup.yml @@ -0,0 +1,71 @@ +--- +# INPUT r_etcd_backup_sufix_name +# INPUT r_etcd_backup_tag +# OUTPUT r_etcd_upgrade_backup_complete +- set_fact: + # ORIGIN etcd_data_dir etcd_common.defaults + l_etcd_backup_dir: "{{ etcd_data_dir }}/openshift-backup-{{ r_etcd_backup_tag | default('') }}{{ r_etcd_backup_sufix_name }}" + +# TODO: replace shell module with command and update later checks +- name: Check available disk space for etcd backup + shell: df --output=avail -k {{ etcd_data_dir }} | tail -n 1 + register: avail_disk + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + +# TODO: replace shell module with command and update later checks +- name: Check current etcd disk usage + shell: du --exclude='*openshift-backup*' -k {{ etcd_data_dir }} | tail -n 1 | cut -f1 + register: etcd_disk_usage + when: r_etcd_upgrade_embedded_etcd | bool + # AUDIT:changed_when: `false` because we are only inspecting + # state, not manipulating anything + changed_when: false + +- name: Abort if insufficient disk space for etcd backup + fail: + msg: > + {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ avail_disk.stdout }} Kb available. + when: (r_etcd_upgrade_embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + +# For non containerized and non embedded we should have the correct version of +# etcd installed already. So don't do anything. +# +# For containerized installs we now exec into etcd_container +# +# For embedded non containerized we need to ensure we have the latest version +# etcd on the host. +- name: Install latest etcd for embedded + package: + name: etcd + state: latest + when: + - r_etcd_upgrade_embedded_etcd | bool + - not l_ostree_booted.stat.exists | bool + +- name: Generate etcd backup + command: > + {{ etcdctl_command }} backup --data-dir={{ etcd_data_dir }} + --backup-dir={{ l_etcd_backup_dir }} + +# According to the docs change you can simply copy snap/db +# https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6 +- name: Check for v3 data store + stat: + path: "{{ etcd_data_dir }}/member/snap/db" + register: v3_db + +- name: Copy etcd v3 data store + command: > + cp -a {{ etcd_data_dir }}/member/snap/db + {{ l_etcd_backup_dir }}/member/snap/ + when: v3_db.stat.exists + +- set_fact: + r_etcd_upgrade_backup_complete: True + +- name: Display location of etcd backup + debug: + msg: "Etcd backup created in {{ l_etcd_backup_dir }}" diff --git a/roles/etcd_upgrade/tasks/main.yml b/roles/etcd_upgrade/tasks/main.yml new file mode 100644 index 000000000..5178c14e3 --- /dev/null +++ b/roles/etcd_upgrade/tasks/main.yml @@ -0,0 +1,14 @@ +--- +# INPUT r_etcd_upgrade_action +- name: Fail if invalid etcd_upgrade_action provided + fail: + msg: "etcd_upgrade role can only be called with 'upgrade' or 'backup'" + when: + - r_etcd_upgrade_action not in ['upgrade', 'backup'] + +- name: Detecting Atomic Host Operating System + stat: + path: /run/ostree-booted + register: l_ostree_booted + +- include: "{{ r_etcd_upgrade_action }}.yml" diff --git a/roles/etcd_upgrade/tasks/upgrade.yml b/roles/etcd_upgrade/tasks/upgrade.yml new file mode 100644 index 000000000..420c9638e --- /dev/null +++ b/roles/etcd_upgrade/tasks/upgrade.yml @@ -0,0 +1,11 @@ +--- +# INPUT r_etcd_upgrade_version +# INPUT r_etcd_upgrade_mechanism +- name: Failt if r_etcd_upgrade_mechanism is not set during upgrade + fail: + msg: "r_etcd_upgrade_mechanism can be only set to 'rpm' or 'image'" + when: + - r_etcd_upgrade_mechanism not in ['rpm', 'image'] + +- name: "Upgrade {{ r_etcd_upgrade_mechanism }} based etcd" + include: upgrade_{{ r_etcd_upgrade_mechanism }}.yml diff --git a/roles/etcd_upgrade/tasks/upgrade_image.yml b/roles/etcd_upgrade/tasks/upgrade_image.yml new file mode 100644 index 000000000..136ec1142 --- /dev/null +++ b/roles/etcd_upgrade/tasks/upgrade_image.yml @@ -0,0 +1,48 @@ +--- +# INPUT r_etcd_upgrade_version +- name: Verify cluster is healthy pre-upgrade + command: "{{ etcdctlv2 }} cluster-health" + +- name: Get current image + shell: "grep 'ExecStart=' {{ etcd_service_file }} | awk '{print $NF}'" + register: current_image + +- name: Set new_etcd_image + set_fact: + new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd:' ~ r_etcd_upgrade_version ) }}" + +- name: Pull new etcd image + command: "docker pull {{ new_etcd_image }}" + +- name: Update to latest etcd image + replace: + dest: "{{ etcd_service_file }}" + regexp: "{{ current_image.stdout }}$" + replace: "{{ new_etcd_image }}" + +- name: Restart etcd_container + systemd: + name: "{{ etcd_service }}" + daemon_reload: yes + state: restarted + +## TODO: probably should just move this into the backup playbooks, also this +## will fail on atomic host. We need to revisit how to do etcd backups there as +## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) +- name: Upgrade etcd for etcdctl when not atomic + package: name=etcd state=latest + when: not l_ostree_booted.stat.exists | bool + +- name: Verify cluster is healthy + command: "{{ etcdctlv2 }} cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 + +- name: Store new etcd_image + # DEPENDENCY openshift_facts + openshift_facts: + role: etcd + local_facts: + etcd_image: "{{ new_etcd_image }}" diff --git a/roles/etcd_upgrade/tasks/upgrade_rpm.yml b/roles/etcd_upgrade/tasks/upgrade_rpm.yml new file mode 100644 index 000000000..324b69605 --- /dev/null +++ b/roles/etcd_upgrade/tasks/upgrade_rpm.yml @@ -0,0 +1,32 @@ +--- +# INPUT r_etcd_upgrade_version? + +# F23 GA'd with etcd 2.0, currently has 2.2 in updates +# F24 GA'd with etcd-2.2, currently has 2.2 in updates +# F25 Beta currently has etcd 3.0 +# RHEL 7.3.4 with etcd-3.1.3-1.el7 +# RHEL 7.3.3 with etcd-3.1.0-2.el7 +# RHEL 7.3.2 with etcd-3.0.15-1.el7 + +- name: Verify cluster is healthy pre-upgrade + command: "{{ etcdctlv2 }} cluster-health" + +- set_fact: + l_etcd_target_package: "{{ 'etcd' if r_etcd_upgrade_version is not defined else 'etcd-'+r_etcd_upgrade_version+'*' }}" + +- name: Update etcd RPM to {{ l_etcd_target_package }} + package: + name: "{{ l_etcd_target_package }}" + state: latest + +- name: Restart etcd + service: + name: "{{ etcd_service }}" + state: restarted + +- name: Verify cluster is healthy + command: "{{ etcdctlv2 }} cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/roles/etcd_upgrade/vars/main.yml b/roles/etcd_upgrade/vars/main.yml new file mode 100644 index 000000000..5ed919d42 --- /dev/null +++ b/roles/etcd_upgrade/vars/main.yml @@ -0,0 +1,3 @@ +--- +# EXPECTS etcd_peer +etcdctlv2: "etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} -C https://{{ etcd_peer }}:{{ etcd_client_port }}" diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index f657d86cf..1b9bda67e 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -15,6 +15,9 @@ l_is_etcd_system_container: "{{ (use_etcd_system_container | default(use_system_containers) | bool) }}" - set_fact: l_any_system_container: "{{ l_is_etcd_system_container or l_is_openvswitch_system_container or l_is_node_system_container or l_is_master_system_container }}" +- set_fact: + l_etcd_runtime: "{{ 'runc' if l_is_etcd_system_container else 'docker' if l_is_containerized else 'host' }}" + - name: Validate python version fail: @@ -80,6 +83,7 @@ is_node_system_container: "{{ l_is_node_system_container | default(false) }}" is_master_system_container: "{{ l_is_master_system_container | default(false) }}" is_etcd_system_container: "{{ l_is_etcd_system_container | default(false) }}" + etcd_runtime: "{{ l_etcd_runtime }}" system_images_registry: "{{ system_images_registry | default('') }}" public_hostname: "{{ openshift_public_hostname | default(None) }}" public_ip: "{{ openshift_public_ip | default(None) }}" diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py new file mode 100644 index 000000000..7452c9cc1 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -0,0 +1,58 @@ +"""A health check for OpenShift clusters.""" + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var + + +class EtcdVolume(OpenShiftCheck): + """Ensures etcd storage usage does not exceed a given threshold.""" + + name = "etcd_volume" + tags = ["etcd", "health"] + + # Default device usage threshold. Value should be in the range [0, 100]. + default_threshold_percent = 90 + # Where to find ectd data, higher priority first. + supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] + + @classmethod + def is_active(cls, task_vars): + etcd_hosts = get_var(task_vars, "groups", "etcd", default=[]) or get_var(task_vars, "groups", "masters", + default=[]) or [] + is_etcd_host = get_var(task_vars, "ansible_ssh_host") in etcd_hosts + return super(EtcdVolume, cls).is_active(task_vars) and is_etcd_host + + def run(self, tmp, task_vars): + mount_info = self._etcd_mount_info(task_vars) + available = mount_info["size_available"] + total = mount_info["size_total"] + used = total - available + + threshold = get_var( + task_vars, + "etcd_device_usage_threshold_percent", + default=self.default_threshold_percent + ) + + used_percent = 100.0 * used / total + + if used_percent > threshold: + device = mount_info.get("device", "unknown") + mount = mount_info.get("mount", "unknown") + msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format( + used_percent, threshold, device, mount + ) + return {"failed": True, "msg": msg} + + return {"changed": False} + + def _etcd_mount_info(self, task_vars): + ansible_mounts = get_var(task_vars, "ansible_mounts") + mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + + for path in self.supported_mount_paths: + if path in mounts: + return mounts[path] + + paths = ', '.join(sorted(mounts)) or 'none' + msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths) + raise OpenShiftCheckException(msg) diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py new file mode 100644 index 000000000..917045526 --- /dev/null +++ b/roles/openshift_health_checker/test/etcd_volume_test.py @@ -0,0 +1,149 @@ +import pytest + +from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException + + +@pytest.mark.parametrize('ansible_mounts,extra_words', [ + ([], ['none']), # empty ansible_mounts + ([{'mount': '/mnt'}], ['/mnt']), # missing relevant mount paths +]) +def test_cannot_determine_available_disk(ansible_mounts, extra_words): + task_vars = dict( + ansible_mounts=ansible_mounts, + ) + check = EtcdVolume(execute_module=fake_execute_module) + + with pytest.raises(OpenShiftCheckException) as excinfo: + check.run(tmp=None, task_vars=task_vars) + + for word in 'Unable to find etcd storage mount point'.split() + extra_words: + assert word in str(excinfo.value) + + +@pytest.mark.parametrize('size_limit,ansible_mounts', [ + ( + # if no size limit is specified, expect max usage + # limit to default to 90% of size_total + None, + [{ + 'mount': '/', + 'size_available': 40 * 10**9, + 'size_total': 80 * 10**9 + }], + ), + ( + 1, + [{ + 'mount': '/', + 'size_available': 30 * 10**9, + 'size_total': 30 * 10**9, + }], + ), + ( + 20000000000, + [{ + 'mount': '/', + 'size_available': 20 * 10**9, + 'size_total': 40 * 10**9, + }], + ), + ( + 5000000000, + [{ + # not enough space on / ... + 'mount': '/', + 'size_available': 0, + 'size_total': 0, + }, { + # not enough space on /var/lib ... + 'mount': '/var/lib', + 'size_available': 2 * 10**9, + 'size_total': 21 * 10**9, + }, { + # ... but enough on /var/lib/etcd + 'mount': '/var/lib/etcd', + 'size_available': 36 * 10**9, + 'size_total': 40 * 10**9 + }], + ) +]) +def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts): + task_vars = dict( + etcd_device_usage_threshold_percent=size_limit, + ansible_mounts=ansible_mounts, + ) + + if task_vars["etcd_device_usage_threshold_percent"] is None: + task_vars.pop("etcd_device_usage_threshold_percent") + + check = EtcdVolume(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert not result.get('failed', False) + + +@pytest.mark.parametrize('size_limit_percent,ansible_mounts,extra_words', [ + ( + # if no size limit is specified, expect max usage + # limit to default to 90% of size_total + None, + [{ + 'mount': '/', + 'size_available': 1 * 10**9, + 'size_total': 100 * 10**9, + }], + ['99.0%'], + ), + ( + 70.0, + [{ + 'mount': '/', + 'size_available': 1 * 10**6, + 'size_total': 5 * 10**9, + }], + ['100.0%'], + ), + ( + 40.0, + [{ + 'mount': '/', + 'size_available': 2 * 10**9, + 'size_total': 6 * 10**9, + }], + ['66.7%'], + ), + ( + None, + [{ + # enough space on /var ... + 'mount': '/var', + 'size_available': 20 * 10**9, + 'size_total': 20 * 10**9, + }, { + # .. but not enough on /var/lib + 'mount': '/var/lib', + 'size_available': 1 * 10**9, + 'size_total': 20 * 10**9, + }], + ['95.0%'], + ), +]) +def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts, extra_words): + task_vars = dict( + etcd_device_usage_threshold_percent=size_limit_percent, + ansible_mounts=ansible_mounts, + ) + + if task_vars["etcd_device_usage_threshold_percent"] is None: + task_vars.pop("etcd_device_usage_threshold_percent") + + check = EtcdVolume(execute_module=fake_execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + assert result['failed'] + for word in extra_words: + assert word in result['msg'] + + +def fake_execute_module(*args): + raise AssertionError('this function should not be called') diff --git a/roles/openshift_hosted/tasks/registry/storage/s3.yml b/roles/openshift_hosted/tasks/registry/storage/s3.yml index 26f921f15..318969885 100644 --- a/roles/openshift_hosted/tasks/registry/storage/s3.yml +++ b/roles/openshift_hosted/tasks/registry/storage/s3.yml @@ -2,14 +2,10 @@ - name: Assert that S3 variables are provided for registry_config template assert: that: - - openshift.hosted.registry.storage.s3.accesskey | default(none) is not none - - openshift.hosted.registry.storage.s3.secretkey | default(none) is not none - openshift.hosted.registry.storage.s3.bucket | default(none) is not none - openshift.hosted.registry.storage.s3.region | default(none) is not none msg: | When using S3 storage, the following variables are required: - openshift_hosted_registry_storage_s3_accesskey - openshift_hosted_registry_storage_s3_secretkey openshift_hosted_registry_storage_s3_bucket openshift_hosted_registry_storage_s3_region diff --git a/roles/openshift_hosted/templates/registry_config.j2 b/roles/openshift_hosted/templates/registry_config.j2 index ca6a23f21..dc8a9f089 100644 --- a/roles/openshift_hosted/templates/registry_config.j2 +++ b/roles/openshift_hosted/templates/registry_config.j2 @@ -10,8 +10,12 @@ storage: blobdescriptor: inmemory {% if openshift_hosted_registry_storage_provider | default('') == 's3' %} s3: +{% if openshift_hosted_registry_storage_s3_accesskey is defined %} accesskey: {{ openshift_hosted_registry_storage_s3_accesskey }} +{% endif %} +{% if openshift_hosted_registry_storage_s3_secretkey is defined %} secretkey: {{ openshift_hosted_registry_storage_s3_secretkey }} +{% endif %} region: {{ openshift_hosted_registry_storage_s3_region }} {% if openshift_hosted_registry_storage_s3_regionendpoint is defined %} regionendpoint: {{ openshift_hosted_registry_storage_s3_regionendpoint }} diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index 938ac2a12..1935d9592 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -44,10 +44,10 @@ assetConfig: - {{ cipher_suite }} {% endfor %} {% endif %} -{% if openshift_master_ha | bool %} {% if openshift.master.audit_config | default(none) is not none and openshift.common.version_gte_3_2_or_1_2 | bool %} auditConfig:{{ openshift.master.audit_config | to_padded_yaml(level=1) }} {% endif %} +{% if openshift_master_ha | bool %} controllerLeaseTTL: {{ openshift.master.controller_lease_ttl | default('30') }} {% endif %} {% if openshift.common.version_gte_3_3_or_1_3 | bool %} @@ -274,5 +274,12 @@ servingInfo: - {{ cipher_suite }} {% endfor %} {% endif %} +{% if openshift_template_service_broker_namespaces is defined %} +templateServiceBrokerConfig: + templateNamespaces: +{% for namespace in openshift_template_service_broker_namespaces %} + - {{ namespace }} +{% endfor %} +{% endif %} volumeConfig: dynamicProvisioningEnabled: {{ openshift.master.dynamic_provisioning_enabled }} diff --git a/roles/openshift_master_facts/filter_plugins/openshift_master.py b/roles/openshift_master_facts/filter_plugins/openshift_master.py index b5be193d2..e767772ce 100644 --- a/roles/openshift_master_facts/filter_plugins/openshift_master.py +++ b/roles/openshift_master_facts/filter_plugins/openshift_master.py @@ -468,7 +468,8 @@ class GitHubIdentityProvider(IdentityProviderOauthBase): """ def __init__(self, api_version, idp): IdentityProviderOauthBase.__init__(self, api_version, idp) - self._optional += [['organizations']] + self._optional += [['organizations'], + ['teams']] class FilterModule(object): diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 4ca5e6138..9af10a849 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -1,4 +1,12 @@ --- +- local_action: shell rpm -q python-passlib || echo not installed + register: passlib_result + +- name: Check that python-passlib is available on the control host + assert: + that: + - "'not installed' not in passlib_result.stdout" + msg: "python-passlib rpm must be installed on control host" - name: Set default image variables based on deployment_type include_vars: "{{ item }}" |