diff options
Diffstat (limited to 'roles')
122 files changed, 3326 insertions, 494 deletions
diff --git a/roles/cockpit/tasks/main.yml b/roles/cockpit/tasks/main.yml index bddad778f..57f49ea11 100644 --- a/roles/cockpit/tasks/main.yml +++ b/roles/cockpit/tasks/main.yml @@ -3,7 +3,7 @@ package: name={{ item }} state=present with_items: - cockpit-ws - - cockpit-shell + - cockpit-system - cockpit-bridge - cockpit-docker - "{{ cockpit_plugins }}" diff --git a/roles/contiv/defaults/main.yml b/roles/contiv/defaults/main.yml index 1ccae61f2..8c4d19537 100644 --- a/roles/contiv/defaults/main.yml +++ b/roles/contiv/defaults/main.yml @@ -1,12 +1,12 @@ --- # The version of Contiv binaries to use -contiv_version: 1.0.0-beta.3-02-21-2017.20-52-42.UTC +contiv_version: 1.0.1 # The version of cni binaries cni_version: v0.4.0 -contiv_default_subnet: "20.1.1.1/24" -contiv_default_gw: "20.1.1.254" +contiv_default_subnet: "10.128.0.0/16" +contiv_default_gw: "10.128.254.254" # TCP port that Netmaster listens for network connections netmaster_port: 9999 @@ -69,6 +69,9 @@ netplugin_fwd_mode: bridge # Contiv fabric mode aci|default contiv_fabric_mode: default +# Global VLAN range +contiv_vlan_range: "2900-3000" + # Encapsulation type vlan|vxlan to use for instantiating container networks contiv_encap_mode: vlan @@ -78,8 +81,8 @@ netplugin_driver: ovs # Create a default Contiv network for use by pods contiv_default_network: true -# VLAN/ VXLAN tag value to be used for the default network -contiv_default_network_tag: 1 +# Statically configured tag for default network (if needed) +contiv_default_network_tag: "" #SRFIXME (use the openshift variables) https_proxy: "" @@ -95,6 +98,9 @@ apic_leaf_nodes: "" apic_phys_dom: "" apic_contracts_unrestricted_mode: no apic_epg_bridge_domain: not_specified +apic_configure_default_policy: false +apic_default_external_contract: "uni/tn-common/brc-default" +apic_default_app_profile: "contiv-infra-app-profile" is_atomic: False kube_cert_dir: "/data/src/github.com/openshift/origin/openshift.local.config/master" master_name: "{{ groups['masters'][0] }}" @@ -104,3 +110,12 @@ kube_ca_cert: "{{ kube_cert_dir }}/ca.crt" kube_key: "{{ kube_cert_dir }}/admin.key" kube_cert: "{{ kube_cert_dir }}/admin.crt" kube_master_api_port: 8443 + +# contivh1 default subnet and gateway +#contiv_h1_subnet_default: "132.1.1.0/24" +#contiv_h1_gw_default: "132.1.1.1" +contiv_h1_subnet_default: "10.129.0.0/16" +contiv_h1_gw_default: "10.129.0.1" + +# contiv default private subnet for ext access +contiv_private_ext_subnet: "10.130.0.0/16" diff --git a/roles/contiv/meta/main.yml b/roles/contiv/meta/main.yml index 3223afb6e..da6409f1e 100644 --- a/roles/contiv/meta/main.yml +++ b/roles/contiv/meta/main.yml @@ -26,3 +26,5 @@ dependencies: etcd_url_scheme: http etcd_peer_url_scheme: http when: contiv_role == "netmaster" +- role: contiv_auth_proxy + when: (contiv_role == "netmaster") and (contiv_enable_auth_proxy == true) diff --git a/roles/contiv/tasks/default_network.yml b/roles/contiv/tasks/default_network.yml index 9cf98bb80..f679443e0 100644 --- a/roles/contiv/tasks/default_network.yml +++ b/roles/contiv/tasks/default_network.yml @@ -6,10 +6,53 @@ retries: 9 delay: 10 +- name: Contiv | Set globals + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" global set --fabric-mode {{ contiv_fabric_mode }} --vlan-range {{ contiv_vlan_range }} --fwd-mode {{ netplugin_fwd_mode }} --private-subnet {{ contiv_private_ext_subnet }}' + +- name: Contiv | Set arp mode to flood if ACI + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" global set --arp-mode flood' + when: contiv_fabric_mode == "aci" + - name: Contiv | Check if default-net exists command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net ls' register: net_result - name: Contiv | Create default-net - command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_default_subnet }} -e {{ contiv_encap_mode }} -p {{ contiv_default_network_tag }} --gateway={{ contiv_default_gw }} default-net' + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_default_subnet }} -e {{ contiv_encap_mode }} -p {{ contiv_default_network_tag }} --gateway {{ contiv_default_gw }} default-net' when: net_result.stdout.find("default-net") == -1 + +- name: Contiv | Create host access infra network for VxLan routing case + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_h1_subnet_default }} --gateway={{ contiv_h1_gw_default }} --nw-type="infra" contivh1' + when: (contiv_encap_mode == "vxlan") and (netplugin_fwd_mode == "routing") + +#- name: Contiv | Create an allow-all policy for the default-group +# command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy create ose-allow-all-policy' +# when: contiv_fabric_mode == "aci" + +- name: Contiv | Set up aci external contract to consume default external contract + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" external-contracts create -c -a {{ apic_default_external_contract }} oseExtToConsume' + when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +- name: Contiv | Set up aci external contract to provide default external contract + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" external-contracts create -p -a {{ apic_default_external_contract }} oseExtToProvide' + when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +- name: Contiv | Create aci default-group + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" group create default-net default-group' + when: contiv_fabric_mode == "aci" + +- name: Contiv | Add external contracts to the default-group + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" group create -e oseExtToConsume -e oseExtToProvide default-net default-group' + when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +#- name: Contiv | Add policy rule 1 for allow-all policy +# command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy rule-add -d in --action allow ose-allow-all-policy 1' +# when: contiv_fabric_mode == "aci" + +#- name: Contiv | Add policy rule 2 for allow-all policy +# command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy rule-add -d out --action allow ose-allow-all-policy 2' +# when: contiv_fabric_mode == "aci" + +- name: Contiv | Create default aci app profile + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" app-profile create -g default-group {{ apic_default_app_profile }}' + when: contiv_fabric_mode == "aci" diff --git a/roles/contiv/tasks/netmaster.yml b/roles/contiv/tasks/netmaster.yml index 5057767b8..acaf7386e 100644 --- a/roles/contiv/tasks/netmaster.yml +++ b/roles/contiv/tasks/netmaster.yml @@ -23,7 +23,7 @@ line: "{{ hostvars[item]['ansible_' + netmaster_interface].ipv4.address }} netmaster" state: present when: hostvars[item]['ansible_' + netmaster_interface].ipv4.address is defined - with_items: groups['masters'] + with_items: "{{ groups['masters'] }}" - name: Netmaster | Create netmaster symlinks file: diff --git a/roles/contiv/tasks/netplugin_iptables.yml b/roles/contiv/tasks/netplugin_iptables.yml index 8c348ac67..184c595c5 100644 --- a/roles/contiv/tasks/netplugin_iptables.yml +++ b/roles/contiv/tasks/netplugin_iptables.yml @@ -23,7 +23,36 @@ notify: Save iptables rules - name: Netplugin IPtables | Open vxlan port with iptables - command: /sbin/iptables -I INPUT 1 -p udp --dport 8472 -j ACCEPT -m comment --comment "vxlan" + command: /sbin/iptables -I INPUT 1 -p udp --dport 8472 -j ACCEPT -m comment --comment "netplugin vxlan 8472" + when: iptablesrules.stdout.find("netplugin vxlan 8472") == -1 + notify: Save iptables rules - name: Netplugin IPtables | Open vxlan port with iptables - command: /sbin/iptables -I INPUT 1 -p udp --dport 4789 -j ACCEPT -m comment --comment "vxlan" + command: /sbin/iptables -I INPUT 1 -p udp --dport 4789 -j ACCEPT -m comment --comment "netplugin vxlan 4789" + when: iptablesrules.stdout.find("netplugin vxlan 4789") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow from contivh0 + command: /sbin/iptables -I FORWARD 1 -i contivh0 -j ACCEPT -m comment --comment "contivh0 FORWARD input" + when: iptablesrules.stdout.find("contivh0 FORWARD input") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow to contivh0 + command: /sbin/iptables -I FORWARD 1 -o contivh0 -j ACCEPT -m comment --comment "contivh0 FORWARD output" + when: iptablesrules.stdout.find("contivh0 FORWARD output") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow from contivh1 + command: /sbin/iptables -I FORWARD 1 -i contivh1 -j ACCEPT -m comment --comment "contivh1 FORWARD input" + when: iptablesrules.stdout.find("contivh1 FORWARD input") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow to contivh1 + command: /sbin/iptables -I FORWARD 1 -o contivh1 -j ACCEPT -m comment --comment "contivh1 FORWARD output" + when: iptablesrules.stdout.find("contivh1 FORWARD output") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow dns + command: /sbin/iptables -I INPUT 1 -p udp --dport 53 -j ACCEPT -m comment --comment "contiv dns" + when: iptablesrules.stdout.find("contiv dns") == -1 + notify: Save iptables rules diff --git a/roles/contiv/tasks/packageManagerInstall.yml b/roles/contiv/tasks/packageManagerInstall.yml index 2eff1b85f..e0d48e643 100644 --- a/roles/contiv/tasks/packageManagerInstall.yml +++ b/roles/contiv/tasks/packageManagerInstall.yml @@ -4,9 +4,10 @@ did_install: false - include: pkgMgrInstallers/centos-install.yml - when: ansible_distribution == "CentOS" and not is_atomic + when: (ansible_os_family == "RedHat") and + not is_atomic - name: Package Manager | Set fact saying we did CentOS package install set_fact: did_install: true - when: ansible_distribution == "CentOS" + when: (ansible_os_family == "RedHat") diff --git a/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml b/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml index 51c3d35ac..91e6aadf3 100644 --- a/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml +++ b/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml @@ -1,13 +1,13 @@ --- -- name: PkgMgr CentOS | Install net-tools pkg for route +- name: PkgMgr RHEL/CentOS | Install net-tools pkg for route yum: pkg=net-tools state=latest -- name: PkgMgr CentOS | Get openstack kilo rpm +- name: PkgMgr RHEL/CentOS | Get openstack ocata rpm get_url: - url: https://repos.fedorapeople.org/repos/openstack/openstack-kilo/rdo-release-kilo-2.noarch.rpm - dest: /tmp/rdo-release-kilo-2.noarch.rpm + url: https://repos.fedorapeople.org/repos/openstack/openstack-ocata/rdo-release-ocata-2.noarch.rpm + dest: /tmp/rdo-release-ocata-2.noarch.rpm validate_certs: False environment: http_proxy: "{{ http_proxy|default('') }}" @@ -16,15 +16,15 @@ tags: - ovs_install -- name: PkgMgr CentOS | Install openstack kilo rpm - yum: name=/tmp/rdo-release-kilo-2.noarch.rpm state=present +- name: PkgMgr RHEL/CentOS | Install openstack ocata rpm + yum: name=/tmp/rdo-release-ocata-2.noarch.rpm state=present tags: - ovs_install -- name: PkgMgr CentOS | Install ovs +- name: PkgMgr RHEL/CentOS | Install ovs yum: - pkg=openvswitch - state=latest + pkg=openvswitch-2.5.0-2.el7.x86_64 + state=present environment: http_proxy: "{{ http_proxy|default('') }}" https_proxy: "{{ https_proxy|default('') }}" diff --git a/roles/contiv/templates/netplugin.j2 b/roles/contiv/templates/netplugin.j2 index f3d26c037..a4928cc3d 100644 --- a/roles/contiv/templates/netplugin.j2 +++ b/roles/contiv/templates/netplugin.j2 @@ -1,9 +1,7 @@ {% if contiv_encap_mode == "vlan" %} NETPLUGIN_ARGS='-vlan-if {{ netplugin_interface }} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' {% endif %} -{# Note: Commenting out vxlan encap mode support until it is fully supported {% if contiv_encap_mode == "vxlan" %} -NETPLUGIN_ARGS='-vtep-ip {{ netplugin_ctrl_ip }} -e {{contiv_encap_mode}} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' +NETPLUGIN_ARGS='-vtep-ip {{ netplugin_ctrl_ip }} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' {% endif %} -#} diff --git a/roles/contiv_auth_proxy/README.md b/roles/contiv_auth_proxy/README.md new file mode 100644 index 000000000..287b6c148 --- /dev/null +++ b/roles/contiv_auth_proxy/README.md @@ -0,0 +1,29 @@ +Role Name +========= + +Role to install Contiv API Proxy and UI + +Requirements +------------ + +Docker needs to be installed to run the auth proxy container. + +Role Variables +-------------- + +auth_proxy_image specifies the image with version tag to be used to spin up the auth proxy container. +auth_proxy_cert, auth_proxy_key specify files to use for the proxy server certificates. +auth_proxy_port is the host port and auth_proxy_datastore the cluster data store address. + +Dependencies +------------ + +docker + +Example Playbook +---------------- + +- hosts: netplugin-node + become: true + roles: + - { role: auth_proxy, auth_proxy_port: 10000, auth_proxy_datastore: etcd://netmaster:22379 } diff --git a/roles/contiv_auth_proxy/defaults/main.yml b/roles/contiv_auth_proxy/defaults/main.yml new file mode 100644 index 000000000..4e637a947 --- /dev/null +++ b/roles/contiv_auth_proxy/defaults/main.yml @@ -0,0 +1,11 @@ +--- +auth_proxy_image: "contiv/auth_proxy:1.0.0-beta.2" +auth_proxy_port: 10000 +contiv_certs: "/var/contiv/certs" +cluster_store: "{{ hostvars[groups['masters'][0]]['ansible_' + netmaster_interface].ipv4.address }}:22379" +auth_proxy_cert: "{{ contiv_certs }}/auth_proxy_cert.pem" +auth_proxy_key: "{{ contiv_certs }}/auth_proxy_key.pem" +auth_proxy_datastore: "{{ cluster_store }}" +auth_proxy_binaries: "/var/contiv_cache" +auth_proxy_local_install: False +auth_proxy_rule_comment: "Contiv auth proxy service" diff --git a/roles/contiv_auth_proxy/files/auth-proxy.service b/roles/contiv_auth_proxy/files/auth-proxy.service new file mode 100644 index 000000000..7cd2edff1 --- /dev/null +++ b/roles/contiv_auth_proxy/files/auth-proxy.service @@ -0,0 +1,13 @@ +[Unit] +Description=Contiv Proxy and UI +After=auditd.service systemd-user-sessions.service time-sync.target docker.service + +[Service] +ExecStart=/usr/bin/auth_proxy.sh start +ExecStop=/usr/bin/auth_proxy.sh stop +KillMode=control-group +Restart=on-failure +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/roles/contiv_auth_proxy/handlers/main.yml b/roles/contiv_auth_proxy/handlers/main.yml new file mode 100644 index 000000000..9cb9bea49 --- /dev/null +++ b/roles/contiv_auth_proxy/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# handlers file for auth_proxy diff --git a/roles/contiv_auth_proxy/tasks/cleanup.yml b/roles/contiv_auth_proxy/tasks/cleanup.yml new file mode 100644 index 000000000..a29659cc9 --- /dev/null +++ b/roles/contiv_auth_proxy/tasks/cleanup.yml @@ -0,0 +1,10 @@ +--- + +- name: stop auth-proxy container + service: name=auth-proxy state=stopped + +- name: cleanup iptables for auth proxy + shell: iptables -D INPUT -p tcp --dport {{ item }} -j ACCEPT -m comment --comment "{{ auth_proxy_rule_comment }} ({{ item }})" + become: true + with_items: + - "{{ auth_proxy_port }}" diff --git a/roles/contiv_auth_proxy/tasks/main.yml b/roles/contiv_auth_proxy/tasks/main.yml new file mode 100644 index 000000000..74e7bf794 --- /dev/null +++ b/roles/contiv_auth_proxy/tasks/main.yml @@ -0,0 +1,37 @@ +--- +# tasks file for auth_proxy +- name: setup iptables for auth proxy + shell: > + ( iptables -L INPUT | grep "{{ auth_proxy_rule_comment }} ({{ item }})" ) || \ + iptables -I INPUT 1 -p tcp --dport {{ item }} -j ACCEPT -m comment --comment "{{ auth_proxy_rule_comment }} ({{ item }})" + become: true + with_items: + - "{{ auth_proxy_port }}" + +# Load the auth-proxy-image from local tar. Ignore any errors to handle the +# case where the image is not built in +- name: copy auth-proxy image + copy: src={{ auth_proxy_binaries }}/auth-proxy-image.tar dest=/tmp/auth-proxy-image.tar + when: auth_proxy_local_install == True + +- name: load auth-proxy image + shell: docker load -i /tmp/auth-proxy-image.tar + when: auth_proxy_local_install == True + +- name: create cert folder for proxy + file: path=/var/contiv/certs state=directory + +- name: copy shell script for starting auth-proxy + template: src=auth_proxy.j2 dest=/usr/bin/auth_proxy.sh mode=u=rwx,g=rx,o=rx + +- name: copy cert for starting auth-proxy + copy: src=cert.pem dest=/var/contiv/certs/auth_proxy_cert.pem mode=u=rw,g=r,o=r + +- name: copy key for starting auth-proxy + copy: src=key.pem dest=/var/contiv/certs/auth_proxy_key.pem mode=u=rw,g=r,o=r + +- name: copy systemd units for auth-proxy + copy: src=auth-proxy.service dest=/etc/systemd/system/auth-proxy.service + +- name: start auth-proxy container + systemd: name=auth-proxy daemon_reload=yes state=started enabled=yes diff --git a/roles/contiv_auth_proxy/templates/auth_proxy.j2 b/roles/contiv_auth_proxy/templates/auth_proxy.j2 new file mode 100644 index 000000000..e82e5b4ab --- /dev/null +++ b/roles/contiv_auth_proxy/templates/auth_proxy.j2 @@ -0,0 +1,36 @@ +#!/bin/bash + +usage="$0 start/stop" +if [ $# -ne 1 ]; then + echo USAGE: $usage + exit 1 +fi + +case $1 in +start) + set -e + + /usr/bin/docker run --rm \ + -p 10000:{{ auth_proxy_port }} \ + --net=host --name=auth-proxy \ + -e NO_NETMASTER_STARTUP_CHECK=1 \ + -v /var/contiv:/var/contiv \ + {{ auth_proxy_image }} \ + --tls-key-file={{ auth_proxy_key }} \ + --tls-certificate={{ auth_proxy_cert }} \ + --data-store-address={{ auth_proxy_datastore }} \ + --netmaster-address={{ service_vip }}:9999 \ + --listen-address=:10000 + ;; + +stop) + # don't stop on error + /usr/bin/docker stop auth-proxy + /usr/bin/docker rm -f -v auth-proxy + ;; + +*) + echo USAGE: $usage + exit 1 + ;; +esac diff --git a/roles/contiv_auth_proxy/tests/inventory b/roles/contiv_auth_proxy/tests/inventory new file mode 100644 index 000000000..d18580b3c --- /dev/null +++ b/roles/contiv_auth_proxy/tests/inventory @@ -0,0 +1 @@ +localhost
\ No newline at end of file diff --git a/roles/contiv_auth_proxy/tests/test.yml b/roles/contiv_auth_proxy/tests/test.yml new file mode 100644 index 000000000..2af3250cd --- /dev/null +++ b/roles/contiv_auth_proxy/tests/test.yml @@ -0,0 +1,5 @@ +--- +- hosts: localhost + remote_user: root + roles: + - auth_proxy diff --git a/roles/contiv_auth_proxy/vars/main.yml b/roles/contiv_auth_proxy/vars/main.yml new file mode 100644 index 000000000..9032766c4 --- /dev/null +++ b/roles/contiv_auth_proxy/vars/main.yml @@ -0,0 +1,2 @@ +--- +# vars file for auth_proxy diff --git a/roles/contiv_facts/defaults/main.yaml b/roles/contiv_facts/defaults/main.yaml index a6c08fa63..7b8150954 100644 --- a/roles/contiv_facts/defaults/main.yaml +++ b/roles/contiv_facts/defaults/main.yaml @@ -8,3 +8,6 @@ bin_dir: /usr/bin ansible_temp_dir: /tmp/.ansible/files source_type: packageManager + +# Whether or not to also install and enable the Contiv auth_proxy +contiv_enable_auth_proxy: false diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index fa2f44609..586aebb11 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -122,7 +122,8 @@ - include_role: name: etcd_common - tasks_from: etcdctl.yml + vars: + r_etcd_common_action: drop_etcdctl when: openshift_etcd_etcdctl_profile | default(true) | bool - name: Set fact etcd_service_status_changed diff --git a/roles/etcd_common/defaults/main.yml b/roles/etcd_common/defaults/main.yml index 14e712fcf..8cc7a9c20 100644 --- a/roles/etcd_common/defaults/main.yml +++ b/roles/etcd_common/defaults/main.yml @@ -1,8 +1,18 @@ --- +# Default action when calling this role +r_etcd_common_action: noop +r_etcd_common_backup_tag: '' +r_etcd_common_backup_sufix_name: '' + # runc, docker, host r_etcd_common_etcd_runtime: "docker" r_etcd_common_embedded_etcd: false +# etcd run on a host => use etcdctl command directly +# etcd run as a docker container => use docker exec +# etcd run as a runc container => use runc exec +r_etcd_common_etcdctl_command: "{{ 'etcdctl' if r_etcd_common_etcd_runtime == 'host' or r_etcd_common_embedded_etcd | bool else 'docker exec etcd_container etcdctl' if r_etcd_common_etcd_runtime == 'docker' else 'runc exec etcd etcdctl' }}" + # etcd server vars etcd_conf_dir: '/etc/etcd' r_etcd_common_system_container_host_dir: /var/lib/etcd/etcd.etcd diff --git a/roles/etcd_upgrade/tasks/backup.yml b/roles/etcd_common/tasks/backup.yml index 1ea6fc59f..4a4832275 100644 --- a/roles/etcd_upgrade/tasks/backup.yml +++ b/roles/etcd_common/tasks/backup.yml @@ -1,15 +1,11 @@ --- -# INPUT r_etcd_backup_sufix_name -# INPUT r_etcd_backup_tag -# OUTPUT r_etcd_upgrade_backup_complete - set_fact: - # ORIGIN etcd_data_dir etcd_common.defaults - l_etcd_backup_dir: "{{ etcd_data_dir }}/openshift-backup-{{ r_etcd_backup_tag | default('') }}{{ r_etcd_backup_sufix_name }}" + l_etcd_backup_dir: "{{ etcd_data_dir }}/openshift-backup-{{ r_etcd_common_backup_tag }}{{ r_etcd_common_backup_sufix_name }}" # TODO: replace shell module with command and update later checks - name: Check available disk space for etcd backup shell: df --output=avail -k {{ etcd_data_dir }} | tail -n 1 - register: avail_disk + register: l_avail_disk # AUDIT:changed_when: `false` because we are only inspecting # state, not manipulating anything changed_when: false @@ -17,8 +13,8 @@ # TODO: replace shell module with command and update later checks - name: Check current etcd disk usage shell: du --exclude='*openshift-backup*' -k {{ etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: r_etcd_upgrade_embedded_etcd | bool + register: l_etcd_disk_usage + when: r_etcd_common_embedded_etcd | bool # AUDIT:changed_when: `false` because we are only inspecting # state, not manipulating anything changed_when: false @@ -26,9 +22,9 @@ - name: Abort if insufficient disk space for etcd backup fail: msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (r_etcd_upgrade_embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + {{ l_etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ l_avail_disk.stdout }} Kb available. + when: (r_etcd_common_embedded_etcd | bool) and (l_etcd_disk_usage.stdout|int > l_avail_disk.stdout|int) # For non containerized and non embedded we should have the correct version of # etcd installed already. So don't do anything. @@ -37,17 +33,22 @@ # # For embedded non containerized we need to ensure we have the latest version # etcd on the host. +- name: Detecting Atomic Host Operating System + stat: + path: /run/ostree-booted + register: l_ostree_booted + - name: Install latest etcd for embedded package: name: etcd state: latest when: - - r_etcd_upgrade_embedded_etcd | bool + - r_etcd_common_embedded_etcd | bool - not l_ostree_booted.stat.exists | bool - name: Generate etcd backup command: > - {{ etcdctl_command }} backup --data-dir={{ etcd_data_dir }} + {{ r_etcd_common_etcdctl_command }} backup --data-dir={{ etcd_data_dir }} --backup-dir={{ l_etcd_backup_dir }} # According to the docs change you can simply copy snap/db @@ -55,16 +56,16 @@ - name: Check for v3 data store stat: path: "{{ etcd_data_dir }}/member/snap/db" - register: v3_db + register: l_v3_db - name: Copy etcd v3 data store command: > cp -a {{ etcd_data_dir }}/member/snap/db {{ l_etcd_backup_dir }}/member/snap/ - when: v3_db.stat.exists + when: l_v3_db.stat.exists - set_fact: - r_etcd_upgrade_backup_complete: True + r_etcd_common_backup_complete: True - name: Display location of etcd backup debug: diff --git a/roles/etcd_common/tasks/etcdctl.yml b/roles/etcd_common/tasks/drop_etcdctl.yml index 6cb456677..6cb456677 100644 --- a/roles/etcd_common/tasks/etcdctl.yml +++ b/roles/etcd_common/tasks/drop_etcdctl.yml diff --git a/roles/etcd_common/tasks/main.yml b/roles/etcd_common/tasks/main.yml new file mode 100644 index 000000000..6ed87e6c7 --- /dev/null +++ b/roles/etcd_common/tasks/main.yml @@ -0,0 +1,9 @@ +--- +- name: Fail if invalid r_etcd_common_action provided + fail: + msg: "etcd_common role can only be called with 'noop' or 'backup' or 'drop_etcdctl'" + when: r_etcd_common_action not in ['noop', 'backup', 'drop_etcdctl'] + +- name: Include main action task file + include: "{{ r_etcd_common_action }}.yml" + when: r_etcd_common_action != "noop" diff --git a/roles/etcd_migrate/README.md b/roles/etcd_migrate/README.md new file mode 100644 index 000000000..369e78ff2 --- /dev/null +++ b/roles/etcd_migrate/README.md @@ -0,0 +1,53 @@ +Role Name +========= + +Offline etcd migration of data from v2 to v3 + +Requirements +------------ + +It is expected all consumers of the etcd data are not accessing the data. +Otherwise the migrated data can be out-of-sync with the v2 and can result in unhealthy etcd cluster. + +The role itself is responsible for: +- checking etcd cluster health and raft status before the migration +- checking of presence of any v3 data (in that case the migration is stopped) +- migration of v2 data to v3 data (including attaching leases of keys prefixed with "/kubernetes.io/events" and "/kubernetes.io/masterleases" string) +- validation of migrated data (all v2 keys and in v3 keys and are set to the identical value) + +The migration itself requires an etcd member to be down in the process. Once the migration is done, the etcd member is started. + +Role Variables +-------------- + +TBD + +Dependencies +------------ + +- etcd_common +- lib_utils + +Example Playbook +---------------- + +```yaml +- name: Migrate etcd data from v2 to v3 + hosts: oo_etcd_to_config + gather_facts: no + tasks: + - include_role: + name: openshift_etcd_migrate + vars: + etcd_peer: "{{ ansible_default_ipv4.address }}" +``` + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +Jan Chaloupka (jchaloup@redhat.com) diff --git a/roles/etcd_migrate/defaults/main.yml b/roles/etcd_migrate/defaults/main.yml new file mode 100644 index 000000000..05cf41fbb --- /dev/null +++ b/roles/etcd_migrate/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# Default action when calling this role, choices: check, migrate, configure +r_etcd_migrate_action: migrate diff --git a/roles/etcd_migrate/meta/main.yml b/roles/etcd_migrate/meta/main.yml new file mode 100644 index 000000000..f3cabbef6 --- /dev/null +++ b/roles/etcd_migrate/meta/main.yml @@ -0,0 +1,17 @@ +--- +galaxy_info: + author: Jan Chaloupka + description: Etcd migration + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.1 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: +- { role: etcd_common } +- { role: lib_utils } diff --git a/roles/etcd_migrate/tasks/check.yml b/roles/etcd_migrate/tasks/check.yml new file mode 100644 index 000000000..2f07713bc --- /dev/null +++ b/roles/etcd_migrate/tasks/check.yml @@ -0,0 +1,55 @@ +--- +# Check the cluster is healthy +- include: check_cluster_health.yml + +# Check if the member has v3 data already +# Run the migration only if the data are v2 +- name: Check if there are any v3 data + command: > + etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:2379' get "" --from-key --keys-only -w json --limit 1 + environment: + ETCDCTL_API: 3 + register: l_etcdctl_output + +- fail: + msg: "Unable to get a number of v3 keys" + when: l_etcdctl_output.rc != 0 + +- fail: + msg: "The etcd has at least one v3 key" + when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0" + + +# TODO(jchaloup): once the until loop can be used over include/block, +# remove the repetive code +# - until loop not supported over include statement (nor block) +# https://github.com/ansible/ansible/issues/17098 +# - with_items not supported over block + +# Check the cluster status for the first time +- include: check_cluster_status.yml + +# Check the cluster status for the second time +- block: + - debug: + msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" + - name: Wait a while before another check + pause: + seconds: 5 + when: not l_etcd_cluster_status_ok | bool + + - include: check_cluster_status.yml + when: not l_etcd_cluster_status_ok | bool + + +# Check the cluster status for the third time +- block: + - debug: + msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" + - name: Wait a while before another check + pause: + seconds: 5 + when: not l_etcd_cluster_status_ok | bool + + - include: check_cluster_status.yml + when: not l_etcd_cluster_status_ok | bool diff --git a/roles/etcd_migrate/tasks/check_cluster_health.yml b/roles/etcd_migrate/tasks/check_cluster_health.yml new file mode 100644 index 000000000..1abd6a32f --- /dev/null +++ b/roles/etcd_migrate/tasks/check_cluster_health.yml @@ -0,0 +1,23 @@ +--- +- name: Check cluster health + command: > + etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://{{ etcd_peer }}:2379 cluster-health + register: etcd_cluster_health + changed_when: false + failed_when: false + +- name: Assume a member is not healthy + set_fact: + etcd_member_healthy: false + +- name: Get member item health status + set_fact: + etcd_member_healthy: true + with_items: "{{ etcd_cluster_health.stdout_lines }}" + when: "(etcd_peer in item) and ('is healthy' in item)" + +- name: Check the etcd cluster health + # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy? + fail: + msg: "Etcd member {{ etcd_peer }} is not healthy" + when: not etcd_member_healthy diff --git a/roles/etcd_migrate/tasks/check_cluster_status.yml b/roles/etcd_migrate/tasks/check_cluster_status.yml new file mode 100644 index 000000000..90fe385c1 --- /dev/null +++ b/roles/etcd_migrate/tasks/check_cluster_status.yml @@ -0,0 +1,32 @@ +--- +# etcd_ip originates from etcd_common role +- name: Check cluster status + command: > + etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints 'https://{{ etcd_peer }}:2379' -w json endpoint status + environment: + ETCDCTL_API: 3 + register: l_etcd_cluster_status + +- name: Retrieve raftIndex + set_fact: + etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}" + +- block: + # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers + - name: Group all raftIndices into a list + set_fact: + etcd_members_raft_indices: "{{ groups['oo_etcd_to_config'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" + + - name: Check the minimum and the maximum of raftIndices is at most 1 + set_fact: + etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}" + + - debug: + msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" + + when: inventory_hostname in groups.oo_etcd_to_config[0] + +# The cluster raft status is ok if the difference of the max and min raft index is at most 1 +- name: capture the status + set_fact: + l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_config[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd_migrate/tasks/configure.yml b/roles/etcd_migrate/tasks/configure.yml new file mode 100644 index 000000000..a305d5bf3 --- /dev/null +++ b/roles/etcd_migrate/tasks/configure.yml @@ -0,0 +1,13 @@ +--- +- name: Configure master to use etcd3 storage backend + yedit: + src: /etc/origin/master/master-config.yaml + key: "{{ item.key }}" + value: "{{ item.value }}" + with_items: + - key: kubernetesMasterConfig.apiServerArguments.storage-backend + value: + - etcd3 + - key: kubernetesMasterConfig.apiServerArguments.storage-media-type + value: + - application/vnd.kubernetes.protobuf diff --git a/roles/etcd_migrate/tasks/main.yml b/roles/etcd_migrate/tasks/main.yml new file mode 100644 index 000000000..409b0b613 --- /dev/null +++ b/roles/etcd_migrate/tasks/main.yml @@ -0,0 +1,25 @@ +--- +- name: Fail if invalid r_etcd_migrate_action provided + fail: + msg: "etcd_migrate role can only be called with 'check' or 'migrate' or 'configure'" + when: r_etcd_migrate_action not in ['check', 'migrate', 'configure'] + +- name: Include main action task file + include: "{{ r_etcd_migrate_action }}.yml" + +# 2. migrate v2 datadir into v3: +# ETCDCTL_API=3 ./etcdctl migrate --data-dir=${data_dir} --no-ttl +# backup the etcd datadir first +# Provide a way for an operator to specify transformer + +# 3. re-configure OpenShift master at /etc/origin/master/master-config.yml +# set storage-backend to “etcd3” +# 4. we could leave the master restart to current logic (there is already the code ready (single vs. HA master)) + +# Run +# etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://172.16.186.45:2379 cluster-health +# to check the cluster health (from the etcdctl.sh aliases file) + +# Another assumption: +# - in order to migrate all etcd v2 data into v3, we need to shut down the cluster (let's verify that on Wednesday meeting) +# - diff --git a/roles/etcd_migrate/tasks/migrate.yml b/roles/etcd_migrate/tasks/migrate.yml new file mode 100644 index 000000000..cb479b0cc --- /dev/null +++ b/roles/etcd_migrate/tasks/migrate.yml @@ -0,0 +1,53 @@ +--- +# Should this be run in a serial manner? +- set_fact: + l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" + +- name: Disable etcd members + service: + name: "{{ l_etcd_service }}" + state: stopped + +# Should we skip all TTL keys? https://bugzilla.redhat.com/show_bug.cgi?id=1389773 +- name: Migrate etcd data + command: > + etcdctl migrate --data-dir={{ etcd_data_dir }} + environment: + ETCDCTL_API: 3 + register: l_etcdctl_migrate + +# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup +- name: Check the etcd v2 data are correctly migrated + fail: + msg: "Failed to migrate a member" + when: "'finished transforming keys' not in l_etcdctl_migrate.stdout" + +# TODO(jchaloup): start the etcd on a different port so noone can access it +# Once the validation is done +- name: Enable etcd member + service: + name: "{{ l_etcd_service }}" + state: started + +- name: Re-introduce leases (as a replacement for key TTLs) + command: > + oadm migrate etcd-ttl \ + --cert {{ etcd_peer_cert_file }} \ + --key {{ etcd_peer_key_file }} \ + --cacert {{ etcd_peer_ca_file }} \ + --etcd-address 'https://{{ etcd_peer }}:2379' \ + --ttl-keys-prefix {{ item }} \ + --lease-duration 1h + environment: + ETCDCTL_API: 3 + with_items: + - "/kubernetes.io/events" + - "/kubernetes.io/masterleases" + +- set_fact: + r_etcd_migrate_success: true + +- name: Enable etcd member + service: + name: "{{ l_etcd_service }}" + state: started diff --git a/roles/etcd_upgrade/defaults/main.yml b/roles/etcd_upgrade/defaults/main.yml index b61bf526c..61bbba225 100644 --- a/roles/etcd_upgrade/defaults/main.yml +++ b/roles/etcd_upgrade/defaults/main.yml @@ -1,9 +1,3 @@ --- r_etcd_upgrade_action: upgrade r_etcd_upgrade_mechanism: rpm -r_etcd_upgrade_embedded_etcd: false -r_etcd_common_embedded_etcd: "{{ r_etcd_upgrade_embedded_etcd }}" -# etcd run on a host => use etcdctl command directly -# etcd run as a docker container => use docker exec -# etcd run as a runc container => use runc exec -etcdctl_command: "{{ 'etcdctl' if r_etcd_common_etcd_runtime == 'host' or r_etcd_upgrade_embedded_etcd | bool else 'docker exec etcd_container etcdctl' if r_etcd_common_etcd_runtime == 'docker' else 'runc exec etcd etcdctl' }}" diff --git a/roles/etcd_upgrade/tasks/main.yml b/roles/etcd_upgrade/tasks/main.yml index 5178c14e3..129c69d6b 100644 --- a/roles/etcd_upgrade/tasks/main.yml +++ b/roles/etcd_upgrade/tasks/main.yml @@ -2,9 +2,9 @@ # INPUT r_etcd_upgrade_action - name: Fail if invalid etcd_upgrade_action provided fail: - msg: "etcd_upgrade role can only be called with 'upgrade' or 'backup'" + msg: "etcd_upgrade role can only be called with 'upgrade'" when: - - r_etcd_upgrade_action not in ['upgrade', 'backup'] + - r_etcd_upgrade_action not in ['upgrade'] - name: Detecting Atomic Host Operating System stat: diff --git a/roles/lib_openshift/library/oc_atomic_container.py b/roles/lib_openshift/library/oc_atomic_container.py index 1e017a576..91c0d752f 100644 --- a/roles/lib_openshift/library/oc_atomic_container.py +++ b/roles/lib_openshift/library/oc_atomic_container.py @@ -65,8 +65,11 @@ options: # -*- -*- -*- Begin included fragment: ansible/oc_atomic_container.py -*- -*- -*- -# pylint: disable=wrong-import-position,too-many-branches,invalid-name +# pylint: disable=wrong-import-position,too-many-branches,invalid-name,no-name-in-module, import-error import json + +from distutils.version import StrictVersion + from ansible.module_utils.basic import AnsibleModule @@ -191,9 +194,15 @@ def main(): ) # Verify that the platform supports atomic command - rc, _, err = module.run_command('atomic -v', check_rc=False) + rc, version_out, err = module.run_command('atomic -v', check_rc=False) if rc != 0: module.fail_json(msg="Error in running atomic command", err=err) + # This module requires atomic version 1.17.2 or later + atomic_version = StrictVersion(version_out.replace('\n', '')) + if atomic_version < StrictVersion('1.17.2'): + module.fail_json( + msg="atomic version 1.17.2+ is required", + err=str(atomic_version)) try: core(module) diff --git a/roles/lib_openshift/library/oc_obj.py b/roles/lib_openshift/library/oc_obj.py index 56af303cc..9b0c0e0e4 100644 --- a/roles/lib_openshift/library/oc_obj.py +++ b/roles/lib_openshift/library/oc_obj.py @@ -90,9 +90,9 @@ options: required: false default: str aliases: [] - all_namespace: + all_namespaces: description: - - The namespace where the object lives. + - Search in all namespaces for the object. required: false default: false aliases: [] diff --git a/roles/lib_openshift/src/ansible/oc_atomic_container.py b/roles/lib_openshift/src/ansible/oc_atomic_container.py index 1a5ab6869..16848e9c6 100644 --- a/roles/lib_openshift/src/ansible/oc_atomic_container.py +++ b/roles/lib_openshift/src/ansible/oc_atomic_container.py @@ -1,8 +1,11 @@ # pylint: skip-file # flake8: noqa -# pylint: disable=wrong-import-position,too-many-branches,invalid-name +# pylint: disable=wrong-import-position,too-many-branches,invalid-name,no-name-in-module, import-error import json + +from distutils.version import StrictVersion + from ansible.module_utils.basic import AnsibleModule @@ -127,9 +130,15 @@ def main(): ) # Verify that the platform supports atomic command - rc, _, err = module.run_command('atomic -v', check_rc=False) + rc, version_out, err = module.run_command('atomic -v', check_rc=False) if rc != 0: module.fail_json(msg="Error in running atomic command", err=err) + # This module requires atomic version 1.17.2 or later + atomic_version = StrictVersion(version_out.replace('\n', '')) + if atomic_version < StrictVersion('1.17.2'): + module.fail_json( + msg="atomic version 1.17.2+ is required", + err=str(atomic_version)) try: core(module) diff --git a/roles/lib_openshift/src/doc/obj b/roles/lib_openshift/src/doc/obj index 4ff912b2d..c6504ed01 100644 --- a/roles/lib_openshift/src/doc/obj +++ b/roles/lib_openshift/src/doc/obj @@ -39,9 +39,9 @@ options: required: false default: str aliases: [] - all_namespace: + all_namespaces: description: - - The namespace where the object lives. + - Search in all namespaces for the object. required: false default: false aliases: [] diff --git a/roles/openshift_cfme/README.md b/roles/openshift_cfme/README.md new file mode 100644 index 000000000..8283afed6 --- /dev/null +++ b/roles/openshift_cfme/README.md @@ -0,0 +1,404 @@ +# OpenShift-Ansible - CFME Role + +# PROOF OF CONCEPT - Alpha Version + +This role is based on the work in the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. For additional literature on configuration specific to +ManageIQ (optional post-installation tasks), visit the project's +[upstream documentation page](http://manageiq.org/docs/get-started/basic-configuration). + +Please submit a +[new issue](https://github.com/openshift/openshift-ansible/issues/new) +if you run into bugs with this role or wish to request enhancements. + +# Important Notes + +This is an early *proof of concept* role to install the Cloud Forms +Management Engine (ManageIQ) on OpenShift Container Platform (OCP). + +* This role is still in **ALPHA STATUS** +* Many options are hard-coded still (ex: NFS setup) +* Not many configurable options yet +* **Should** be ran on a dedicated cluster +* **Will not run** on undersized infra +* The terms *CFME* and *MIQ* / *ManageIQ* are interchangeable + +## Requirements + +**NOTE:** These requirements are copied from the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. + +### Prerequisites: + +* + [OpenShift Origin 1.5](https://docs.openshift.com/container-platform/3.5/welcome/index.html) + or + [higher](https://docs.openshift.com/container-platform/latest/welcome/index.html) + provisioned +* NFS or other compatible volume provider +* A cluster-admin user (created by role if required) + +### Cluster Sizing + +In order to avoid random deployment failures due to resource +starvation, we recommend a minimum cluster size for a **test** +environment. + +| Type | Size | CPUs | Memory | +|----------------|---------|----------|----------| +| Masters | `1+` | `8` | `12GB` | +| Nodes | `2+` | `4` | `8GB` | +| PV Storage | `25GB` | `N/A` | `N/A` | + + +![Basic CFME Deployment](img/CFMEBasicDeployment.png) + +**CFME has hard-requirements for memory. CFME will NOT install if your + infrastructure does not meet or exceed the requirements given + above. Do not run this playbook if you do not have the required + memory, you will just waste your time.** + + +### Other sizing considerations + +* Recommendations assume MIQ will be the **only application running** + on this cluster. +* Alternatively, you can provision an infrastructure node to run + registry/metrics/router/logging pods. +* Each MIQ application pod will consume at least `3GB` of RAM on initial + deployment (blank deployment without providers). +* RAM consumption will ramp up higher depending on appliance use, once + providers are added expect higher resource consumption. + + +### Assumptions + +1) You meet/exceed the [cluster sizing](#cluster-sizing) requirements +1) Your NFS server is on your master host +1) Your PV backing NFS storage volume is mounted on `/exports/` + +Required directories that NFS will export to back the PVs: + +* `/exports/miq-pv0[123]` + +If the required directories are not present at install-time, they will +be created using the recommended permissions per the +[upstream documentation](https://github.com/ManageIQ/manageiq-pods#make-persistent-volumes-to-host-the-miq-database-and-application-data): + +* UID/GID: `root`/`root` +* Mode: `0775` + +**IMPORTANT:** If you are using a separate volume (`/dev/vdX`) for NFS + storage, **ensure** it is mounted on `/exports/` **before** running + this role. + + + +## Role Variables + +Core variables in this role: + +| Name | Default value | Description | +|-------------------------------|---------------|---------------| +| `openshift_cfme_install_app` | `False` | `True`: Install everything and create a new CFME app, `False`: Just install all of the templates and scaffolding | + + +Variables you may override have defaults defined in +[defaults/main.yml](defaults/main.yml). + + +# Important Notes + +This is a **tech preview** status role presently. Use it with the same +caution you would give any other pre-release software. + +**Most importantly** follow this one rule: don't re-run the entrypoint +playbook multiple times in a row without cleaning up after previous +runs if some of the CFME steps have ran. This is a known +flake. Cleanup instructions are provided at the bottom of this README. + + +# Usage + +This section describes the basic usage of this role. All parameters +will use their [default values](defaults/main.yml). + +## Pre-flight Checks + +**IMPORTANT:** As documented above in [the prerequisites](#prerequisites), + you **must already** have your OCP cluster up and running. + +**Optional:** The ManageIQ pod is fairly large (about 1.7 GB) so to +save some spin-up time post-deployment, you can begin pre-pulling the +docker image to each of your nodes now: + +``` +root@node0x # docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +## Getting Started + +1) The *entry point playbook* to install CFME is located in +[the BYO playbooks](../../playbooks/byo/openshift-cfme/config.yml) +directory + +2) Update your existing `hosts` inventory file and ensure the +parameter `openshift_cfme_install_app` is set to `True` under the +`[OSEv3:vars]` block. + +2) Using your existing `hosts` inventory file, run `ansible-playbook` +with the entry point playbook: + +``` +$ ansible-playbook -v -i <INVENTORY_FILE> playbooks/byo/openshift-cfme/config.yml +``` + +## Next Steps + +Once complete, the playbook will let you know: + + +``` +TASK [openshift_cfme : Status update] ********************************************************* +ok: [ho.st.na.me] => { + "msg": "CFME has been deployed. Note that there will be a delay before it is fully initialized.\n" +} +``` + +This will take several minutes (*possibly 10 or more*, depending on +your network connection). However, you can get some insight into the +deployment process during initialization. + +### oc describe pod manageiq-0 + +*Some useful information about the output you will see if you run the +`oc describe pod manageiq-0` command* + +**Readiness probe**s - These will take a while to become +`Healthy`. The initial health probes won't even happen for at least 8 +minutes depending on how long it takes you to pull down the large +images. ManageIQ is a large application so it may take a considerable +amount of time for it to deploy and be marked as `Healthy`. + +If you go to the node you know the application is running on (check +for `Successfully assigned manageiq-0 to <HOST|IP>` in the `describe` +output) you can run a `docker pull` command to monitor the progress of +the image pull: + +``` +[root@cfme-node ~]# docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +Trying to pull repository docker.io/manageiq/manageiq-pods ... +sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a: Pulling from docker.io/manageiq/manageiq-pods +Digest: sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a +Status: Image is up to date for docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +The example above demonstrates the case where the image has been +successfully pulled already. + +If the image isn't completely pulled already then you will see +multiple progress bars detailing each image layer download status. + + +### rsh + +*Useful inspection/progress monitoring techniques with the `oc rsh` +command.* + + +On your master node, switch to the `cfme` project (or whatever you +named it if you overrode the `openshift_cfme_project` variable) and +check on the pod states: + +``` +[root@cfme-master01 ~]# oc project cfme +Now using project "cfme" on server "https://10.10.0.100:8443". + +[root@cfme-master01 ~]# oc get pod +NAME READY STATUS RESTARTS AGE +manageiq-0 0/1 Running 0 14m +memcached-1-3lk7g 1/1 Running 0 14m +postgresql-1-12slb 1/1 Running 0 14m +``` + +Note how the `manageiq-0` pod says `0/1` under the **READY** +column. After some time (depending on your network connection) you'll +be able to `rsh` into the pod to find out more of what's happening in +real time. First, the easy-mode command, run this once `rsh` is +available and then watch until it says `Started Initialize Appliance +Database`: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 journalctl -f -u appliance-initialize.service +``` + +For the full explanation of what this means, and more interactive +inspection techniques, keep reading on. + +To obtain a shell on our `manageiq` pod we use this command: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 bash -l +``` + +The `rsh` command opens a shell in your pod for you. In this case it's +the pod called `manageiq-0`. `systemd` is managing the services in +this pod so we can use the `list-units` command to see what is running +currently: `# systemctl list-units | grep appliance`. + +If you see the `appliance-initialize` service running, this indicates +that basic setup is still in progress. We can monitor the process with +the `journalctl` command like so: + + +``` +[root@manageiq-0 vmdb]# journalctl -f -u appliance-initialize.service +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking deployment status == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: No pre-existing EVM configuration found on region PV +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking for existing data on server PV == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Starting New Deployment == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Applying memcached config == +Jun 14 14:55:53 manageiq-0 appliance-initialize.sh[58]: == Initializing Appliance == +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: create encryption key +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: configuring external database +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: Checking for connections to the database... +Jun 14 14:56:09 manageiq-0 appliance-initialize.sh[58]: Create region starting +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: Create region complete +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data backup == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sending incremental file list +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: created directory /persistent/server-deploy/backup/backup_2017_06_14_145816 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/REGION +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/v2_key +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/database.yml +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/GUID +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sent 1330 bytes received 136 bytes 2932.00 bytes/sec +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: total size is 770 speedup is 0.53 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Restoring PV data symlinks == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/REGION symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/config/database.yml symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/certs/v2_key symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/log symlink is already in place, skipping +Jun 14 14:58:28 manageiq-0 systemctl[304]: Removed symlink /etc/systemd/system/multi-user.target.wants/appliance-initialize.service. +Jun 14 14:58:29 manageiq-0 systemd[1]: Started Initialize Appliance Database. +``` + +Most of what we see here (above) is the initial database seeding +process. This process isn't very quick, so be patient. + +At the bottom of the log there is a special line from the `systemctl` +service, `Removed symlink +/etc/systemd/system/multi-user.target.wants/appliance-initialize.service`. The +`appliance-initialize` service is no longer marked as enabled. This +indicates that the base application initialization is complete now. + +We're not done yet though, there are other ancillary services which +run in this pod to support the application. *Still in the rsh shell*, +Use the `ps` command to monitor for the `httpd` processes +starting. You will see output similar to the following when that stage +has completed: + +``` +[root@manageiq-0 vmdb]# ps aux | grep http +root 1941 0.0 0.1 249820 7640 ? Ss 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1942 0.0 0.0 250752 6012 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1943 0.0 0.0 250472 5952 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1944 0.0 0.0 250472 5916 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1945 0.0 0.0 250360 5764 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +``` + +Furthermore, you can find other related processes by just looking for +ones with `MIQ` in their name: + +``` +[root@manageiq-0 vmdb]# ps aux | grep miq +root 333 27.7 4.2 555884 315916 ? Sl 14:58 3:59 MIQ Server +root 1976 0.6 4.0 507224 303740 ? SNl 15:02 0:03 MIQ: MiqGenericWorker id: 1, queue: generic +root 1984 0.6 4.0 507224 304312 ? SNl 15:02 0:03 MIQ: MiqGenericWorker id: 2, queue: generic +root 1992 0.9 4.0 508252 304888 ? SNl 15:02 0:05 MIQ: MiqPriorityWorker id: 3, queue: generic +root 2000 0.7 4.0 510308 304696 ? SNl 15:02 0:04 MIQ: MiqPriorityWorker id: 4, queue: generic +root 2008 1.2 4.0 514000 303612 ? SNl 15:02 0:07 MIQ: MiqScheduleWorker id: 5 +root 2026 0.2 4.0 517504 303644 ? SNl 15:02 0:01 MIQ: MiqEventHandler id: 6, queue: ems +root 2036 0.2 4.0 518532 303768 ? SNl 15:02 0:01 MIQ: MiqReportingWorker id: 7, queue: reporting +root 2044 0.2 4.0 519560 303812 ? SNl 15:02 0:01 MIQ: MiqReportingWorker id: 8, queue: reporting +root 2059 0.2 4.0 528372 303956 ? SNl 15:02 0:01 puma 3.3.0 (tcp://127.0.0.1:5000) [MIQ: Web Server Worker] +root 2067 0.9 4.0 529664 305716 ? SNl 15:02 0:05 puma 3.3.0 (tcp://127.0.0.1:3000) [MIQ: Web Server Worker] +root 2075 0.2 4.0 529408 304056 ? SNl 15:02 0:01 puma 3.3.0 (tcp://127.0.0.1:4000) [MIQ: Web Server Worker] +root 2329 0.0 0.0 10640 972 ? S+ 15:13 0:00 grep --color=auto -i miq +``` + +Finally, *still in the rsh shell*, to test if the application is +running correctly, we can request the application homepage. If the +page is available the page title will be `ManageIQ: Login`: + +``` +[root@manageiq-0 vmdb]# curl -s -k https://localhost | grep -A2 '<title>' +<title> +ManageIQ: Login +</title> +``` + +**Note:** The `-s` flag makes `curl` operations silent and the `-k` +flag to ignore errors about untrusted certificates. + + + +# Additional Upstream Resources + +Below are some useful resources from the upstream project +documentation. You may find these of value. + +* [Verify Setup Was Successful](https://github.com/ManageIQ/manageiq-pods#verifying-the-setup-was-successful) +* [POD Access And Routes](https://github.com/ManageIQ/manageiq-pods#pod-access-and-routes) +* [Troubleshooting](https://github.com/ManageIQ/manageiq-pods#troubleshooting) + + +# Manual Cleanup + +At this time uninstallation/cleanup is still a manual process. You +will have to follow a few steps to fully remove CFME from your +cluster. + +Delete the project: + +* `oc delete project cfme` + +Delete the PVs: + +* `oc delete pv miq-pv01` +* `oc delete pv miq-pv02` +* `oc delete pv miq-pv03` + +Clean out the old PV data: + +* `cd /exports/` +* `find miq* -type f -delete` +* `find miq* -type d -delete` + +Remove the NFS exports: + +* `rm /etc/exports.d/openshift_cfme.exports` +* `exportfs -ar` + +Delete the user: + +* `oc delete user cfme` + +**NOTE:** The `oc delete project cfme` command will return quickly +however it will continue to operate in the background. Continue +running `oc get project` after you've completed the other steps to +monitor the pods and final project termination progress. diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml new file mode 100644 index 000000000..493e1ef68 --- /dev/null +++ b/roles/openshift_cfme/defaults/main.yml @@ -0,0 +1,38 @@ +--- +# Namespace for the CFME project +openshift_cfme_project: cfme +# Namespace/project description +openshift_cfme_project_description: ManageIQ - CloudForms Management Engine +# Basic user assigned the `admin` role for the project +openshift_cfme_user: cfme +# Project system account for enabling privileged pods +openshift_cfme_service_account: "system:serviceaccount:{{ openshift_cfme_project }}:default" +# All the required exports +openshift_cfme_pv_exports: + - miq-pv01 + - miq-pv02 + - miq-pv03 +# PV template files and their created object names +openshift_cfme_pv_data: + - pv_name: miq-pv01 + pv_template: miq-pv-db.yaml + pv_label: CFME DB PV + - pv_name: miq-pv02 + pv_template: miq-pv-region.yaml + pv_label: CFME Region PV + - pv_name: miq-pv03 + pv_template: miq-pv-server.yaml + pv_label: CFME Server PV + +# Tuning parameter to use more than 5 images at once from an ImageStream +openshift_cfme_maxImagesBulkImportedPerRepository: 100 +# Hostname/IP of the NFS server. Currently defaults to first master +openshift_cfme_nfs_server: "{{ groups.nfs.0 }}" +# TODO: Refactor '_install_app' variable. This is just for testing but +# maybe in the future it should control the entire yes/no for CFME. +# +# Whether or not the manageiq app should be initialized ('oc new-app +# --template=manageiq). If False everything UP TO 'new-app' is ran. +openshift_cfme_install_app: False +# Docker image to pull +openshift_cfme_container_image: "docker.io/manageiq/manageiq-pods:app-latest-fine" diff --git a/roles/openshift_cfme/files/miq-template.yaml b/roles/openshift_cfme/files/miq-template.yaml new file mode 100644 index 000000000..8f0d2af38 --- /dev/null +++ b/roles/openshift_cfme/files/miq-template.yaml @@ -0,0 +1,566 @@ +--- +path: /tmp/miq-template-out +data: + apiVersion: v1 + kind: Template + labels: + template: manageiq + metadata: + name: manageiq + annotations: + description: "ManageIQ appliance with persistent storage" + tags: "instant-app,manageiq,miq" + iconClass: "icon-rails" + objects: + - apiVersion: v1 + kind: Secret + metadata: + name: "${NAME}-secrets" + stringData: + pg-password: "${DATABASE_PASSWORD}" + - apiVersion: v1 + kind: Service + metadata: + annotations: + description: "Exposes and load balances ManageIQ pods" + service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' + name: ${NAME} + spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: https + port: 443 + protocol: TCP + targetPort: 443 + selector: + name: ${NAME} + - apiVersion: v1 + kind: Route + metadata: + name: ${NAME} + spec: + host: ${APPLICATION_DOMAIN} + port: + targetPort: https + tls: + termination: passthrough + to: + kind: Service + name: ${NAME} + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-app + annotations: + description: "Keeps track of the ManageIQ image changes" + spec: + dockerImageRepository: "${APPLICATION_IMG_NAME}" + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-postgresql + annotations: + description: "Keeps track of the PostgreSQL image changes" + spec: + dockerImageRepository: "${POSTGRESQL_IMG_NAME}" + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-memcached + annotations: + description: "Keeps track of the Memcached image changes" + spec: + dockerImageRepository: "${MEMCACHED_IMG_NAME}" + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: "${NAME}-${DATABASE_SERVICE_NAME}" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${DATABASE_VOLUME_CAPACITY} + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: "${NAME}-region" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${APPLICATION_REGION_VOLUME_CAPACITY} + - apiVersion: apps/v1beta1 + kind: "StatefulSet" + metadata: + name: ${NAME} + annotations: + description: "Defines how to deploy the ManageIQ appliance" + spec: + serviceName: "${NAME}" + replicas: "${APPLICATION_REPLICA_COUNT}" + template: + metadata: + labels: + name: ${NAME} + name: ${NAME} + spec: + containers: + - name: manageiq + image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" + livenessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 480 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: / + port: 443 + scheme: HTTPS + initialDelaySeconds: 200 + timeoutSeconds: 3 + ports: + - containerPort: 80 + protocol: TCP + - containerPort: 443 + protocol: TCP + securityContext: + privileged: true + volumeMounts: + - + name: "${NAME}-server" + mountPath: "/persistent" + - + name: "${NAME}-region" + mountPath: "/persistent-region" + env: + - + name: "APPLICATION_INIT_DELAY" + value: "${APPLICATION_INIT_DELAY}" + - + name: "DATABASE_SERVICE_NAME" + value: "${DATABASE_SERVICE_NAME}" + - + name: "DATABASE_REGION" + value: "${DATABASE_REGION}" + - + name: "MEMCACHED_SERVICE_NAME" + value: "${MEMCACHED_SERVICE_NAME}" + - + name: "POSTGRESQL_USER" + value: "${DATABASE_USER}" + - + name: "POSTGRESQL_PASSWORD" + valueFrom: + secretKeyRef: + name: "${NAME}-secrets" + key: "pg-password" + - + name: "POSTGRESQL_DATABASE" + value: "${DATABASE_NAME}" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + value: "${POSTGRESQL_MAX_CONNECTIONS}" + - + name: "POSTGRESQL_SHARED_BUFFERS" + value: "${POSTGRESQL_SHARED_BUFFERS}" + resources: + requests: + memory: "${APPLICATION_MEM_REQ}" + cpu: "${APPLICATION_CPU_REQ}" + limits: + memory: "${APPLICATION_MEM_LIMIT}" + lifecycle: + preStop: + exec: + command: + - /opt/manageiq/container-scripts/sync-pv-data + volumes: + - + name: "${NAME}-region" + persistentVolumeClaim: + claimName: ${NAME}-region + volumeClaimTemplates: + - metadata: + name: "${NAME}-server" + annotations: + # Uncomment this if using dynamic volume provisioning. + # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html + # volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: [ ReadWriteOnce ] + resources: + requests: + storage: "${APPLICATION_VOLUME_CAPACITY}" + - apiVersion: v1 + kind: "Service" + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + annotations: + description: "Exposes the memcached server" + spec: + ports: + - + name: "memcached" + port: 11211 + targetPort: 11211 + selector: + name: "${MEMCACHED_SERVICE_NAME}" + - apiVersion: v1 + kind: "DeploymentConfig" + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + annotations: + description: "Defines how to deploy memcached" + spec: + strategy: + type: "Recreate" + triggers: + - + type: "ImageChange" + imageChangeParams: + automatic: true + containerNames: + - "memcached" + from: + kind: "ImageStreamTag" + name: "miq-memcached:${MEMCACHED_IMG_TAG}" + - + type: "ConfigChange" + replicas: 1 + selector: + name: "${MEMCACHED_SERVICE_NAME}" + template: + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + labels: + name: "${MEMCACHED_SERVICE_NAME}" + spec: + volumes: [] + containers: + - + name: "memcached" + image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" + ports: + - + containerPort: 11211 + readinessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 5 + tcpSocket: + port: 11211 + livenessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 30 + tcpSocket: + port: 11211 + volumeMounts: [] + env: + - + name: "MEMCACHED_MAX_MEMORY" + value: "${MEMCACHED_MAX_MEMORY}" + - + name: "MEMCACHED_MAX_CONNECTIONS" + value: "${MEMCACHED_MAX_CONNECTIONS}" + - + name: "MEMCACHED_SLAB_PAGE_SIZE" + value: "${MEMCACHED_SLAB_PAGE_SIZE}" + resources: + requests: + memory: "${MEMCACHED_MEM_REQ}" + cpu: "${MEMCACHED_CPU_REQ}" + limits: + memory: "${MEMCACHED_MEM_LIMIT}" + - apiVersion: v1 + kind: "Service" + metadata: + name: "${DATABASE_SERVICE_NAME}" + annotations: + description: "Exposes the database server" + spec: + ports: + - + name: "postgresql" + port: 5432 + targetPort: 5432 + selector: + name: "${DATABASE_SERVICE_NAME}" + - apiVersion: v1 + kind: "DeploymentConfig" + metadata: + name: "${DATABASE_SERVICE_NAME}" + annotations: + description: "Defines how to deploy the database" + spec: + strategy: + type: "Recreate" + triggers: + - + type: "ImageChange" + imageChangeParams: + automatic: true + containerNames: + - "postgresql" + from: + kind: "ImageStreamTag" + name: "miq-postgresql:${POSTGRESQL_IMG_TAG}" + - + type: "ConfigChange" + replicas: 1 + selector: + name: "${DATABASE_SERVICE_NAME}" + template: + metadata: + name: "${DATABASE_SERVICE_NAME}" + labels: + name: "${DATABASE_SERVICE_NAME}" + spec: + volumes: + - + name: "miq-pgdb-volume" + persistentVolumeClaim: + claimName: "${NAME}-${DATABASE_SERVICE_NAME}" + containers: + - + name: "postgresql" + image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" + ports: + - + containerPort: 5432 + readinessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 15 + exec: + command: + - "/bin/sh" + - "-i" + - "-c" + - "psql -h 127.0.0.1 -U ${POSTGRESQL_USER} -q -d ${POSTGRESQL_DATABASE} -c 'SELECT 1'" + livenessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 60 + tcpSocket: + port: 5432 + volumeMounts: + - + name: "miq-pgdb-volume" + mountPath: "/var/lib/pgsql/data" + env: + - + name: "POSTGRESQL_USER" + value: "${DATABASE_USER}" + - + name: "POSTGRESQL_PASSWORD" + valueFrom: + secretKeyRef: + name: "${NAME}-secrets" + key: "pg-password" + - + name: "POSTGRESQL_DATABASE" + value: "${DATABASE_NAME}" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + value: "${POSTGRESQL_MAX_CONNECTIONS}" + - + name: "POSTGRESQL_SHARED_BUFFERS" + value: "${POSTGRESQL_SHARED_BUFFERS}" + resources: + requests: + memory: "${POSTGRESQL_MEM_REQ}" + cpu: "${POSTGRESQL_CPU_REQ}" + limits: + memory: "${POSTGRESQL_MEM_LIMIT}" + + parameters: + - + name: "NAME" + displayName: Name + required: true + description: "The name assigned to all of the frontend objects defined in this template." + value: manageiq + - + name: "DATABASE_SERVICE_NAME" + displayName: "PostgreSQL Service Name" + required: true + description: "The name of the OpenShift Service exposed for the PostgreSQL container." + value: "postgresql" + - + name: "DATABASE_USER" + displayName: "PostgreSQL User" + required: true + description: "PostgreSQL user that will access the database." + value: "root" + - + name: "DATABASE_PASSWORD" + displayName: "PostgreSQL Password" + required: true + description: "Password for the PostgreSQL user." + from: "[a-zA-Z0-9]{8}" + generate: expression + - + name: "DATABASE_NAME" + required: true + displayName: "PostgreSQL Database Name" + description: "Name of the PostgreSQL database accessed." + value: "vmdb_production" + - + name: "DATABASE_REGION" + required: true + displayName: "Application Database Region" + description: "Database region that will be used for application." + value: "0" + - + name: "MEMCACHED_SERVICE_NAME" + required: true + displayName: "Memcached Service Name" + description: "The name of the OpenShift Service exposed for the Memcached container." + value: "memcached" + - + name: "MEMCACHED_MAX_MEMORY" + displayName: "Memcached Max Memory" + description: "Memcached maximum memory for memcached object storage in MB." + value: "64" + - + name: "MEMCACHED_MAX_CONNECTIONS" + displayName: "Memcached Max Connections" + description: "Memcached maximum number of connections allowed." + value: "1024" + - + name: "MEMCACHED_SLAB_PAGE_SIZE" + displayName: "Memcached Slab Page Size" + description: "Memcached size of each slab page." + value: "1m" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + displayName: "PostgreSQL Max Connections" + description: "PostgreSQL maximum number of database connections allowed." + value: "100" + - + name: "POSTGRESQL_SHARED_BUFFERS" + displayName: "PostgreSQL Shared Buffer Amount" + description: "Amount of memory dedicated for PostgreSQL shared memory buffers." + value: "256MB" + - + name: "APPLICATION_CPU_REQ" + displayName: "Application Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." + value: "1000m" + - + name: "POSTGRESQL_CPU_REQ" + displayName: "PostgreSQL Min CPU Requested" + required: true + description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." + value: "500m" + - + name: "MEMCACHED_CPU_REQ" + displayName: "Memcached Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." + value: "200m" + - + name: "APPLICATION_MEM_REQ" + displayName: "Application Min RAM Requested" + required: true + description: "Minimum amount of memory the Application container will need." + value: "6144Mi" + - + name: "POSTGRESQL_MEM_REQ" + displayName: "PostgreSQL Min RAM Requested" + required: true + description: "Minimum amount of memory the PostgreSQL container will need." + value: "1024Mi" + - + name: "MEMCACHED_MEM_REQ" + displayName: "Memcached Min RAM Requested" + required: true + description: "Minimum amount of memory the Memcached container will need." + value: "64Mi" + - + name: "APPLICATION_MEM_LIMIT" + displayName: "Application Max RAM Limit" + required: true + description: "Maximum amount of memory the Application container can consume." + value: "16384Mi" + - + name: "POSTGRESQL_MEM_LIMIT" + displayName: "PostgreSQL Max RAM Limit" + required: true + description: "Maximum amount of memory the PostgreSQL container can consume." + value: "8192Mi" + - + name: "MEMCACHED_MEM_LIMIT" + displayName: "Memcached Max RAM Limit" + required: true + description: "Maximum amount of memory the Memcached container can consume." + value: "256Mi" + - + name: "POSTGRESQL_IMG_NAME" + displayName: "PostgreSQL Image Name" + description: "This is the PostgreSQL image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "POSTGRESQL_IMG_TAG" + displayName: "PostgreSQL Image Tag" + description: "This is the PostgreSQL image tag/version requested to deploy." + value: "postgresql-latest-fine" + - + name: "MEMCACHED_IMG_NAME" + displayName: "Memcached Image Name" + description: "This is the Memcached image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "MEMCACHED_IMG_TAG" + displayName: "Memcached Image Tag" + description: "This is the Memcached image tag/version requested to deploy." + value: "memcached-latest-fine" + - + name: "APPLICATION_IMG_NAME" + displayName: "Application Image Name" + description: "This is the Application image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "APPLICATION_IMG_TAG" + displayName: "Application Image Tag" + description: "This is the Application image tag/version requested to deploy." + value: "app-latest-fine" + - + name: "APPLICATION_DOMAIN" + displayName: "Application Hostname" + description: "The exposed hostname that will route to the application service, if left blank a value will be defaulted." + value: "" + - + name: "APPLICATION_REPLICA_COUNT" + displayName: "Application Replica Count" + description: "This is the number of Application replicas requested to deploy." + value: "1" + - + name: "APPLICATION_INIT_DELAY" + displayName: "Application Init Delay" + required: true + description: "Delay in seconds before we attempt to initialize the application." + value: "15" + - + name: "APPLICATION_VOLUME_CAPACITY" + displayName: "Application Volume Capacity" + required: true + description: "Volume space available for application data." + value: "5Gi" + - + name: "APPLICATION_REGION_VOLUME_CAPACITY" + displayName: "Application Region Volume Capacity" + required: true + description: "Volume space available for region application data." + value: "5Gi" + - + name: "DATABASE_VOLUME_CAPACITY" + displayName: "Database Volume Capacity" + required: true + description: "Volume space available for database." + value: "15Gi" diff --git a/roles/openshift_cfme/files/openshift_cfme.exports b/roles/openshift_cfme/files/openshift_cfme.exports new file mode 100644 index 000000000..5457d41fc --- /dev/null +++ b/roles/openshift_cfme/files/openshift_cfme.exports @@ -0,0 +1,3 @@ +/exports/miq-pv01 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv02 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv03 *(rw,no_root_squash,no_wdelay) diff --git a/roles/openshift_cfme/handlers/main.yml b/roles/openshift_cfme/handlers/main.yml new file mode 100644 index 000000000..476a5e030 --- /dev/null +++ b/roles/openshift_cfme/handlers/main.yml @@ -0,0 +1,42 @@ +--- +###################################################################### +# NOTE: These are duplicated from roles/openshift_master/handlers/main.yml +# +# TODO: Use the consolidated 'openshift_handlers' role once it's ready +# See: https://github.com/openshift/openshift-ansible/pull/4041#discussion_r118770782 +###################################################################### + +- name: restart master + systemd: name={{ openshift.common.service_type }}-master state=restarted + when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) + notify: Verify API Server + +- name: restart master api + systemd: name={{ openshift.common.service_type }}-master-api state=restarted + when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + notify: Verify API Server + +- name: restart master controllers + systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + +- name: Verify API Server + # Using curl here since the uri module requires python-httplib2 and + # wait_for port doesn't provide health information. + command: > + curl --silent --tlsv1.2 + {% if openshift.common.version_gte_3_2_or_1_2 | bool %} + --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt + {% else %} + --cacert {{ openshift.common.config_base }}/master/ca.crt + {% endif %} + {{ openshift.master.api_url }}/healthz/ready + args: + # Disables the following warning: + # Consider using get_url or uri module rather than running curl + warn: no + register: api_available_output + until: api_available_output.stdout == 'ok' + retries: 120 + delay: 1 + changed_when: false diff --git a/roles/openshift_cfme/img/CFMEBasicDeployment.png b/roles/openshift_cfme/img/CFMEBasicDeployment.png Binary files differnew file mode 100644 index 000000000..a89c1e325 --- /dev/null +++ b/roles/openshift_cfme/img/CFMEBasicDeployment.png diff --git a/roles/openshift_cfme/meta/main.yml b/roles/openshift_cfme/meta/main.yml new file mode 100644 index 000000000..9200f2c3c --- /dev/null +++ b/roles/openshift_cfme/meta/main.yml @@ -0,0 +1,20 @@ +--- +galaxy_info: + author: Tim Bielawa + description: OpenShift CFME (Manage IQ) Deployer + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.2 + version: 1.0 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: +- role: lib_openshift +- role: lib_utils +- role: openshift_common +- role: openshift_master_facts diff --git a/roles/openshift_cfme/tasks/create_pvs.yml b/roles/openshift_cfme/tasks/create_pvs.yml new file mode 100644 index 000000000..7fa7d3997 --- /dev/null +++ b/roles/openshift_cfme/tasks/create_pvs.yml @@ -0,0 +1,36 @@ +--- +# Check for existance and then conditionally: +# - evaluate templates +# - PVs +# +# These tasks idempotently create required CFME PV objects. Do not +# call this file directly. This file is intended to be ran as an +# include that has a 'with_items' attached to it. Hence the use below +# of variables like "{{ item.pv_label }}" + +- name: "Check if the {{ item.pv_label }} template has been created already" + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: list + kind: pv + name: "{{ item.pv_name }}" + register: miq_pv_check + +# Skip all of this if the PV already exists +- block: + - name: "Ensure the {{ item.pv_label }} template is evaluated" + template: + src: "{{ item.pv_template }}.j2" + dest: "{{ template_dir }}/{{ item.pv_template }}" + + - name: "Ensure {{ item.pv_label }} is created" + oc_obj: + namespace: "{{ openshift_cfme_project }}" + kind: pv + name: "{{ item.pv_name }}" + state: present + delete_after: True + files: + - "{{ template_dir }}/{{ item.pv_template }}" + when: + - not miq_pv_check.results.results.0 diff --git a/roles/openshift_cfme/tasks/main.yml b/roles/openshift_cfme/tasks/main.yml new file mode 100644 index 000000000..acbce7232 --- /dev/null +++ b/roles/openshift_cfme/tasks/main.yml @@ -0,0 +1,148 @@ +--- +###################################################################### +# Users, projects, and privileges + +- name: Ensure the CFME user exists + oc_user: + state: present + username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace exists with CFME user as admin + oc_project: + state: present + name: "{{ openshift_cfme_project }}" + display_name: "{{ openshift_cfme_project_description }}" + admin: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace service account is privileged + oc_adm_policy_user: + namespace: "{{ openshift_cfme_project }}" + user: "{{ openshift_cfme_service_account }}" + resource_kind: scc + resource_name: privileged + state: present + +###################################################################### +# NFS + +- name: Ensure the /exports/ directory exists + file: + path: /exports/ + state: directory + mode: 0755 + owner: root + group: root + +- name: Ensure the miq-pv0X export directories exist + file: + path: "/exports/{{ item }}" + state: directory + mode: 0775 + owner: root + group: root + with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the NFS exports for CFME PVs exist + copy: + src: openshift_cfme.exports + dest: /etc/exports.d/openshift_cfme.exports + register: nfs_exports_updated + +- name: Ensure the NFS export table is refreshed if exports were added + command: exportfs -ar + when: + - nfs_exports_updated.changed + + +###################################################################### +# Create the required CFME PVs. Check out these online docs if you +# need a refresher on includes looping with items: +# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0 +# * http://stackoverflow.com/a/35128533 +# +# TODO: Handle the case where a PV template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- include: create_pvs.yml + with_items: "{{ openshift_cfme_pv_data }}" + +###################################################################### +# CFME App Template +# +# Note, this is different from the create_pvs.yml tasks in that the +# application template does not require any jinja2 evaluation. +# +# TODO: Handle the case where the server template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- name: Check if the CFME Server template has been created already + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: list + kind: template + name: manageiq + register: miq_server_check + +- name: Copy over CFME Server template + copy: + src: miq-template.yaml + dest: "{{ template_dir }}/miq-template.yaml" + +- name: Ensure the server template was read from disk + debug: + var=r_openshift_cfme_miq_template_content + +- name: Ensure CFME Server Template exists + oc_obj: + namespace: "{{ openshift_cfme_project }}" + kind: template + name: "manageiq" + state: present + content: "{{ r_openshift_cfme_miq_template_content }}" + +###################################################################### +# Let's do this + +- name: Ensure the CFME Server is created + oc_process: + namespace: "{{ openshift_cfme_project }}" + template_name: manageiq + create: True + register: cfme_new_app_process + run_once: True + when: + # User said to install CFME in their inventory + - openshift_cfme_install_app | bool + # # The server app doesn't exist already + # - not miq_server_check.results.results.0 + +- debug: + var: cfme_new_app_process + +###################################################################### +# Various cleanup steps + +# TODO: Not sure what to do about this right now. Might be able to +# just delete it? This currently warns about "Unable to find +# '<TEMP_DIR>' in expected paths." +- name: Ensure the temporary PV/App templates are erased + file: + path: "{{ item }}" + state: absent + with_fileglob: + - "{{ template_dir }}/*.yaml" + +- name: Ensure the temporary PV/app template directory is erased + file: + path: "{{ template_dir }}" + state: absent + +###################################################################### + +- name: Status update + debug: + msg: > + CFME has been deployed. Note that there will be a delay before + it is fully initialized. diff --git a/roles/openshift_cfme/tasks/tune_masters.yml b/roles/openshift_cfme/tasks/tune_masters.yml new file mode 100644 index 000000000..02b0f10bf --- /dev/null +++ b/roles/openshift_cfme/tasks/tune_masters.yml @@ -0,0 +1,12 @@ +--- +- name: Ensure bulk image import limit is tuned + yedit: + src: /etc/origin/master/master-config.yaml + key: 'imagePolicyConfig.maxImagesBulkImportedPerRepository' + value: "{{ openshift_cfme_maxImagesBulkImportedPerRepository | int() }}" + state: present + backup: True + notify: + - restart master + +- meta: flush_handlers diff --git a/roles/openshift_cfme/tasks/uninstall.yml b/roles/openshift_cfme/tasks/uninstall.yml new file mode 100644 index 000000000..cba734a0e --- /dev/null +++ b/roles/openshift_cfme/tasks/uninstall.yml @@ -0,0 +1,43 @@ +--- +- include_role: + name: lib_openshift + +- name: Uninstall CFME - ManageIQ + debug: + msg: Uninstalling Cloudforms Management Engine - ManageIQ + +- name: Ensure the CFME project is removed + oc_project: + state: absent + name: "{{ openshift_cfme_project }}" + +- name: Ensure the CFME template is removed + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: absent + kind: template + name: manageiq + +- name: Ensure the CFME PVs are removed + oc_obj: + state: absent + all_namespaces: True + kind: pv + name: "{{ item }}" + with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the CFME user is removed + oc_user: + state: absent + username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME NFS Exports are removed + file: + path: /etc/exports.d/openshift_cfme.exports + state: absent + register: nfs_exports_removed + +- name: Ensure the NFS export table is refreshed if exports were removed + command: exportfs -ar + when: + - nfs_exports_removed.changed diff --git a/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 new file mode 100644 index 000000000..b8c3bb277 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv01 +spec: + capacity: + storage: 15Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv01 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 new file mode 100644 index 000000000..7218773f0 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv02 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv02 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 new file mode 100644 index 000000000..7b40b6c69 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv03 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv03 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml index 4f25a9c8f..982bd9530 100644 --- a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml @@ -48,7 +48,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms app image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-app - apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -188,7 +188,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms memcached image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-memcached - apiVersion: v1 kind: "DeploymentConfig" metadata: @@ -272,7 +272,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms postgresql image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-postgresql - apiVersion: v1 kind: "DeploymentConfig" metadata: diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml deleted file mode 100644 index 14bdd1dca..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: cloudforms -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - nfs: - path: /opt/nfs/volumes-app - server: 10.19.0.216 - persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml new file mode 100644 index 000000000..250a99b8d --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv01 +spec: + capacity: + storage: 15Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv01 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml deleted file mode 100644 index 709d8d976..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: nfs-pv01 -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - nfs: - path: /opt/nfs/volumes - server: 10.19.0.216 - persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml new file mode 100644 index 000000000..cba9bbe35 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv02 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv02 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml new file mode 100644 index 000000000..c08c21265 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv03 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv03 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml index 4f25a9c8f..3bc6c5813 100644 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml @@ -17,6 +17,7 @@ objects: service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' name: ${NAME} spec: + clusterIP: None ports: - name: http port: 80 @@ -48,11 +49,27 @@ objects: annotations: description: "Keeps track of changes in the CloudForms app image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app + dockerImageRepository: "${APPLICATION_IMG_NAME}" +- apiVersion: v1 + kind: ImageStream + metadata: + name: cfme-openshift-postgresql + annotations: + description: "Keeps track of changes in the CloudForms postgresql image" + spec: + dockerImageRepository: "${POSTGRESQL_IMG_NAME}" +- apiVersion: v1 + kind: ImageStream + metadata: + name: cfme-openshift-memcached + annotations: + description: "Keeps track of changes in the CloudForms memcached image" + spec: + dockerImageRepository: "${MEMCACHED_IMG_NAME}" - apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ${DATABASE_SERVICE_NAME} + name: "${NAME}-${DATABASE_SERVICE_NAME}" spec: accessModes: - ReadWriteOnce @@ -62,45 +79,41 @@ objects: - apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ${NAME} + name: "${NAME}-region" spec: accessModes: - ReadWriteOnce resources: requests: - storage: ${APPLICATION_VOLUME_CAPACITY} -- apiVersion: v1 - kind: "DeploymentConfig" + storage: ${APPLICATION_REGION_VOLUME_CAPACITY} +- apiVersion: apps/v1beta1 + kind: "StatefulSet" metadata: name: ${NAME} annotations: description: "Defines how to deploy the CloudForms appliance" spec: + serviceName: "${NAME}" + replicas: 1 template: metadata: labels: name: ${NAME} name: ${NAME} spec: - volumes: - - - name: "cfme-app-volume" - persistentVolumeClaim: - claimName: ${NAME} containers: - - image: cloudforms/cfme-openshift-app:${APPLICATION_IMG_TAG} - imagePullPolicy: IfNotPresent - name: cloudforms + - name: cloudforms + image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" livenessProbe: - httpGet: - path: / - port: 80 + tcpSocket: + port: 443 initialDelaySeconds: 480 timeoutSeconds: 3 readinessProbe: httpGet: path: / - port: 80 + port: 443 + scheme: HTTPS initialDelaySeconds: 200 timeoutSeconds: 3 ports: @@ -112,8 +125,11 @@ objects: privileged: true volumeMounts: - - name: "cfme-app-volume" + name: "${NAME}-server" mountPath: "/persistent" + - + name: "${NAME}-region" + mountPath: "/persistent-region" env: - name: "APPLICATION_INIT_DELAY" @@ -144,29 +160,32 @@ objects: value: "${POSTGRESQL_SHARED_BUFFERS}" resources: requests: - memory: "${MEMORY_APPLICATION_MIN}" + memory: "${APPLICATION_MEM_REQ}" + cpu: "${APPLICATION_CPU_REQ}" + limits: + memory: "${APPLICATION_MEM_LIMIT}" lifecycle: preStop: exec: command: - /opt/rh/cfme-container-scripts/sync-pv-data - replicas: 1 - selector: - name: ${NAME} - triggers: - - type: "ConfigChange" - - type: "ImageChange" - imageChangeParams: - automatic: true - containerNames: - - "cloudforms" - from: - kind: "ImageStreamTag" - name: "cfme-openshift-app:${APPLICATION_IMG_TAG}" - strategy: - type: "Recreate" - recreateParams: - timeoutSeconds: 1200 + volumes: + - + name: "${NAME}-region" + persistentVolumeClaim: + claimName: ${NAME}-region + volumeClaimTemplates: + - metadata: + name: "${NAME}-server" + annotations: + # Uncomment this if using dynamic volume provisioning. + # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html + # volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: [ ReadWriteOnce ] + resources: + requests: + storage: "${APPLICATION_VOLUME_CAPACITY}" - apiVersion: v1 kind: "Service" metadata: @@ -182,14 +201,6 @@ objects: selector: name: "${MEMCACHED_SERVICE_NAME}" - apiVersion: v1 - kind: ImageStream - metadata: - name: cfme-openshift-memcached - annotations: - description: "Keeps track of changes in the CloudForms memcached image" - spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached -- apiVersion: v1 kind: "DeploymentConfig" metadata: name: "${MEMCACHED_SERVICE_NAME}" @@ -223,7 +234,7 @@ objects: containers: - name: "memcached" - image: "cloudforms/cfme-openshift-memcached:${MEMCACHED_IMG_TAG}" + image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" ports: - containerPort: 11211 @@ -249,8 +260,11 @@ objects: name: "MEMCACHED_SLAB_PAGE_SIZE" value: "${MEMCACHED_SLAB_PAGE_SIZE}" resources: + requests: + memory: "${MEMCACHED_MEM_REQ}" + cpu: "${MEMCACHED_CPU_REQ}" limits: - memory: "${MEMORY_MEMCACHED_LIMIT}" + memory: "${MEMCACHED_MEM_LIMIT}" - apiVersion: v1 kind: "Service" metadata: @@ -266,14 +280,6 @@ objects: selector: name: "${DATABASE_SERVICE_NAME}" - apiVersion: v1 - kind: ImageStream - metadata: - name: cfme-openshift-postgresql - annotations: - description: "Keeps track of changes in the CloudForms postgresql image" - spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql -- apiVersion: v1 kind: "DeploymentConfig" metadata: name: "${DATABASE_SERVICE_NAME}" @@ -307,11 +313,11 @@ objects: - name: "cfme-pgdb-volume" persistentVolumeClaim: - claimName: ${DATABASE_SERVICE_NAME} + claimName: "${NAME}-${DATABASE_SERVICE_NAME}" containers: - name: "postgresql" - image: "cloudforms/cfme-openshift-postgresql:${POSTGRESQL_IMG_TAG}" + image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" ports: - containerPort: 5432 @@ -350,8 +356,11 @@ objects: name: "POSTGRESQL_SHARED_BUFFERS" value: "${POSTGRESQL_SHARED_BUFFERS}" resources: + requests: + memory: "${POSTGRESQL_MEM_REQ}" + cpu: "${POSTGRESQL_CPU_REQ}" limits: - memory: "${MEMORY_POSTGRESQL_LIMIT}" + memory: "${POSTGRESQL_MEM_LIMIT}" parameters: - @@ -420,36 +429,87 @@ parameters: name: "POSTGRESQL_SHARED_BUFFERS" displayName: "PostgreSQL Shared Buffer Amount" description: "Amount of memory dedicated for PostgreSQL shared memory buffers." - value: "64MB" + value: "256MB" - - name: "MEMORY_APPLICATION_MIN" - displayName: "Application Memory Minimum" + name: "APPLICATION_CPU_REQ" + displayName: "Application Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." + value: "1000m" + - + name: "POSTGRESQL_CPU_REQ" + displayName: "PostgreSQL Min CPU Requested" + required: true + description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." + value: "500m" + - + name: "MEMCACHED_CPU_REQ" + displayName: "Memcached Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." + value: "200m" + - + name: "APPLICATION_MEM_REQ" + displayName: "Application Min RAM Requested" required: true description: "Minimum amount of memory the Application container will need." - value: "4096Mi" + value: "6144Mi" + - + name: "POSTGRESQL_MEM_REQ" + displayName: "PostgreSQL Min RAM Requested" + required: true + description: "Minimum amount of memory the PostgreSQL container will need." + value: "1024Mi" - - name: "MEMORY_POSTGRESQL_LIMIT" - displayName: "PostgreSQL Memory Limit" + name: "MEMCACHED_MEM_REQ" + displayName: "Memcached Min RAM Requested" required: true - description: "Maximum amount of memory the PostgreSQL container can use." - value: "2048Mi" + description: "Minimum amount of memory the Memcached container will need." + value: "64Mi" - - name: "MEMORY_MEMCACHED_LIMIT" - displayName: "Memcached Memory Limit" + name: "APPLICATION_MEM_LIMIT" + displayName: "Application Max RAM Limit" required: true - description: "Maximum amount of memory the Memcached container can use." + description: "Maximum amount of memory the Application container can consume." + value: "16384Mi" + - + name: "POSTGRESQL_MEM_LIMIT" + displayName: "PostgreSQL Max RAM Limit" + required: true + description: "Maximum amount of memory the PostgreSQL container can consume." + value: "8192Mi" + - + name: "MEMCACHED_MEM_LIMIT" + displayName: "Memcached Max RAM Limit" + required: true + description: "Maximum amount of memory the Memcached container can consume." value: "256Mi" - + name: "POSTGRESQL_IMG_NAME" + displayName: "PostgreSQL Image Name" + description: "This is the PostgreSQL image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql" + - name: "POSTGRESQL_IMG_TAG" displayName: "PostgreSQL Image Tag" description: "This is the PostgreSQL image tag/version requested to deploy." value: "latest" - + name: "MEMCACHED_IMG_NAME" + displayName: "Memcached Image Name" + description: "This is the Memcached image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-memcached" + - name: "MEMCACHED_IMG_TAG" displayName: "Memcached Image Tag" description: "This is the Memcached image tag/version requested to deploy." value: "latest" - + name: "APPLICATION_IMG_NAME" + displayName: "Application Image Name" + description: "This is the Application image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-app" + - name: "APPLICATION_IMG_TAG" displayName: "Application Image Tag" description: "This is the Application image tag/version requested to deploy." @@ -464,16 +524,22 @@ parameters: displayName: "Application Init Delay" required: true description: "Delay in seconds before we attempt to initialize the application." - value: "30" + value: "15" - name: "APPLICATION_VOLUME_CAPACITY" displayName: "Application Volume Capacity" required: true description: "Volume space available for application data." - value: "1Gi" + value: "5Gi" + - + name: "APPLICATION_REGION_VOLUME_CAPACITY" + displayName: "Application Region Volume Capacity" + required: true + description: "Volume space available for region application data." + value: "5Gi" - name: "DATABASE_VOLUME_CAPACITY" displayName: "Database Volume Capacity" required: true description: "Volume space available for database." - value: "1Gi" + value: "15Gi" diff --git a/roles/openshift_excluder/tasks/install.yml b/roles/openshift_excluder/tasks/install.yml index d09358bee..3a866cedf 100644 --- a/roles/openshift_excluder/tasks/install.yml +++ b/roles/openshift_excluder/tasks/install.yml @@ -1,14 +1,24 @@ --- -- name: Install docker excluder - package: - name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_docker_package_state }}" - when: - - r_openshift_excluder_enable_docker_excluder | bool - -- name: Install openshift excluder - package: - name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_package_state }}" - when: - - r_openshift_excluder_enable_openshift_excluder | bool + +- when: + - not openshift.common.is_atomic | bool + - r_openshift_excluder_install_ran is not defined + + block: + + - name: Install docker excluder + package: + name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_docker_package_state }}" + when: + - r_openshift_excluder_enable_docker_excluder | bool + + - name: Install openshift excluder + package: + name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_package_state }}" + when: + - r_openshift_excluder_enable_openshift_excluder | bool + + - set_fact: + r_openshift_excluder_install_ran: True diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index cfe092a28..0788ddfb0 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -1654,6 +1654,7 @@ def set_proxy_facts(facts): common['no_proxy'].extend(common['no_proxy_internal_hostnames'].split(',')) # We always add local dns domain and ourselves no matter what common['no_proxy'].append('.' + common['dns_domain']) + common['no_proxy'].append('.svc') common['no_proxy'].append(common['hostname']) common['no_proxy'] = ','.join(sort_unique(common['no_proxy'])) facts['common'] = common diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 1b9bda67e..50ed3e964 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -24,12 +24,18 @@ msg: | openshift-ansible requires Python 3 for {{ ansible_distribution }}; For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html - when: ansible_distribution == 'Fedora' and ansible_python['version']['major'] != 3 + when: + - ansible_distribution == 'Fedora' + - ansible_python['version']['major'] != 3 + - r_openshift_facts_ran is not defined - name: Validate python version fail: msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}" - when: ansible_distribution != 'Fedora' and ansible_python['version']['major'] != 2 + when: + - ansible_distribution != 'Fedora' + - ansible_python['version']['major'] != 2 + - r_openshift_facts_ran is not defined # Fail as early as possible if Atomic and old version of Docker - block: @@ -48,7 +54,9 @@ that: - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=') - when: l_is_atomic | bool + when: + - l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Load variables include_vars: "{{ item }}" @@ -59,7 +67,9 @@ - name: Ensure various deps are installed package: name={{ item }} state=present with_items: "{{ required_packages }}" - when: not l_is_atomic | bool + when: + - not l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Ensure various deps for running system containers are installed package: name={{ item }} state=present @@ -67,6 +77,7 @@ when: - not l_is_atomic | bool - l_any_system_container | bool + - r_openshift_facts_ran is not defined - name: Gather Cluster facts and set is_containerized if needed openshift_facts: @@ -99,3 +110,7 @@ - name: Set repoquery command set_fact: repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}" + +- name: Register that this already ran + set_fact: + r_openshift_facts_ran: True diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 27e6fe383..26bf4c09b 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,24 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" + from openshift_checks import OpenShiftCheck, get_var from openshift_checks.mixins import DockerHostMixin +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} + + class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. @@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] - deployment_image_info = { - "origin": { - "namespace": "openshift", - "name": "origin", - }, - "openshift-enterprise": { - "namespace": "openshift3", - "name": "ose", - }, - } - @classmethod def is_active(cls, task_vars): """Skip hosts with unsupported deployment types.""" deployment_type = get_var(task_vars, "openshift_deployment_type") - has_valid_deployment_type = deployment_type in cls.deployment_image_info + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type @@ -70,51 +74,55 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return {"changed": changed} - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "openshift_deployment_type") - image_info = self.deployment_image_info[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release", default="latest") - openshift_image_tag = get_var(task_vars, "openshift_image_tag") - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - images = set(self.required_docker_images( - image_info["namespace"], - image_info["name"], - ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names - openshift_release, - is_containerized, - )) - - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.required_qualified_docker_images( - image_info["namespace"], - image_info["name"], - openshift_image_tag, - ), - ) - - return images - @staticmethod - def required_docker_images(namespace, name, additional_image_names, version, is_containerized): - if is_containerized: - return ["{}/{}:{}".format(namespace, name, version)] if name else [] - - # include additional non-containerized images specific to the current deployment type - return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - - @staticmethod - def required_qualified_docker_images(namespace, name, version): - # pylint: disable=invalid-name - return [ - "{}/{}-{}:{}".format(namespace, name, suffix, version) - for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] - ] + def required_images(task_vars): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = get_var(task_vars, "openshift_deployment_type") + host_groups = get_var(task_vars, "group_names") + image_tag = get_var(task_vars, "openshift_image_tag") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = get_var(task_vars, "oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if get_var(task_vars, "openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required def local_images(self, images, task_vars): """Filter a list of images and return those available locally.""" @@ -124,7 +132,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): ] def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) if result.get("failed", False): return False @@ -132,6 +141,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): @staticmethod def known_docker_registries(task_vars): + """Build a list of docker registries available according to inventory vars.""" docker_facts = get_var(task_vars, "openshift", "docker") regs = set(docker_facts["additional_registries"]) @@ -147,17 +157,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Inspect existing images using Skopeo and return all images successfully inspected.""" return [ image for image in images - if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) + if self.is_available_skopeo_image(image, registries, task_vars) ] - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" + def is_available_skopeo_image(self, image, registries, task_vars): + """Use Skopeo to determine if required image exists in known registry(s).""" + + # if image does already includes a registry, just use that + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) + for registry in registries: + args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)} + result = self.execute_module("command", args, task_vars=task_vars) + if result.get("rc", 0) == 0 and not result.get("failed"): + return True - args = {"_raw_params": cmd_str} - result = self.module_executor("command", args, task_vars) - return not result.get("failed", False) and result.get("rc", 0) == 0 + return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 7f1751b36..2bd615457 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -34,7 +34,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - info = self.execute_module("docker_info", {}, task_vars) + info = self.execute_module("docker_info", {}, task_vars=task_vars) if info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} @@ -146,7 +146,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not - ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars) + ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) if ret.get("failed") or ret.get("rc", 0) != 0: raise OpenShiftCheckException( "Is LVM installed? Failed to run /sbin/vgs " diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 7f3d78cc4..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -40,8 +40,11 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum") - result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars) + result = self.execute_module( + get_var(task_vars, "ansible_pkg_mgr", default="yum"), + {"name": self.dependencies, "state": "present"}, + task_vars=task_vars, + ) msg = result.get("msg", "") if result.get("failed"): if "No package matching" in msg: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): }, ], } - return self.execute_module("rpm_version", args, task_vars) + return self.execute_module("rpm_version", args, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..0dd2b1286 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) @staticmethod def master_packages(rpm_prefix): @@ -36,8 +36,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): "bash-completion", "cockpit-bridge", "cockpit-docker", - "cockpit-kubernetes", - "cockpit-shell", + "cockpit-system", "cockpit-ws", "etcd", "httpd-tools", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..6a76bb93d 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp, task_vars) + return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 197c65f51..0a7c0f8d3 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == "yum": return {"changed": True} assert module_name == "docker_image_facts" - assert 'name' in args - assert args['name'] + assert 'name' in module_args + assert module_args['name'] return { - 'images': [args['name']], + 'images': [module_args['name']], } result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type='origin', - openshift_release='v3.4', openshift_image_tag='3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} @@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally): docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), ), openshift_deployment_type='origin', - openshift_release='3.4', openshift_image_tag='v3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -108,8 +108,8 @@ def test_all_images_unavailable(): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release=None, - openshift_image_tag='latest' + openshift_image_tag='latest', + group_names=['nodes', 'masters'], )) assert actual['failed'] @@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words): docker=dict(additional_registries=["unknown.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert actual["failed"] @@ -177,8 +177,85 @@ def test_registry_availability(deployment_type, registries): docker=dict(additional_registries=registries), ), openshift_deployment_type=deployment_type, - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ + ( # standard set of stuff required on nodes + "origin", False, ['nodes'], None, + set([ + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # origin version of registry-console + ]) + ), + ( # set a different URL for images + "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', + set([ + 'foo.io/openshift/origin-pod:vtest', + 'foo.io/openshift/origin-deployer:vtest', + 'foo.io/openshift/origin-docker-registry:vtest', + 'foo.io/openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # AFAICS this is not built from the URL + ]) + ), + ( + "origin", True, ['nodes', 'masters', 'etcd'], None, + set([ + # images running on top of openshift + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', + # containerized component images + 'openshift/origin:vtest', + 'openshift/node:vtest', + 'openshift/openvswitch:vtest', + 'registry.access.redhat.com/rhel7/etcd', + ]) + ), + ( # enterprise images + "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'foo.io/openshift3/ose-pod:f13ac45', + 'foo.io/openshift3/ose-deployer:f13ac45', + 'foo.io/openshift3/ose-docker-registry:f13ac45', + 'foo.io/openshift3/ose-haproxy-router:f13ac45', + # registry-console is not constructed/versioned the same as the others. + 'registry.access.redhat.com/openshift3/registry-console', + # containerized images aren't built from oreg_url + 'openshift3/node:vtest', + 'openshift3/openvswitch:vtest', + ]) + ), + ( + "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'registry.access.redhat.com/rhel7/etcd', + # lb does not yet come in a containerized version + ]) + ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=is_containerized, + is_atomic=False, + ), + ), + openshift_deployment_type=deployment_type, + group_names=groups, + oreg_url=oreg_url, + openshift_image_tag='vtest', + ) + + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 292a323db..876614b1d 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -77,7 +77,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ), ]) def test_check_storage_driver(docker_info, failed, expect_msg): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name == "yum": return {} if module_name != "docker_info": @@ -187,7 +187,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): ) ]) def test_vg_free(pool, command_returns, raises, returns): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name != "command": raise ValueError("not expecting module " + module_name) return command_returns diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 751489958..d895e9a68 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -124,6 +124,35 @@ edits: "{{ openshift_hosted_registry_edits }}" force: "{{ True|bool in openshift_hosted_registry_force }}" +- name: Ensure OpenShift registry correctly rolls out (best-effort today) + command: | + oc rollout status deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + failed_when: false + +- name: Determine the latest version of the OpenShift registry deployment + command: | + oc get deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_registry_latest_version + +- name: Sanity-check that the OpenShift registry rolled out correctly + command: | + oc get replicationcontroller {{ openshift_hosted_registry_name }}-{{ openshift_hosted_registry_latest_version.stdout }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_registry_rc_phase + until: "'Running' not in openshift_hosted_registry_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_registry_rc_phase.stdout" + - include: storage/glusterfs.yml when: - openshift.hosted.registry.storage.kind | default(none) == 'glusterfs' or openshift.hosted.registry.storage.glusterfs.swap diff --git a/roles/openshift_hosted/tasks/registry/storage/glusterfs.yml b/roles/openshift_hosted/tasks/registry/storage/glusterfs.yml index e6bb196b8..c504bfb80 100644 --- a/roles/openshift_hosted/tasks/registry/storage/glusterfs.yml +++ b/roles/openshift_hosted/tasks/registry/storage/glusterfs.yml @@ -35,7 +35,7 @@ mount: state: mounted fstype: glusterfs - src: "{{ groups.oo_glusterfs_to_config[0] }}:/{{ openshift.hosted.registry.storage.glusterfs.path }}" + src: "{% if 'glusterfs_registry' in groups %}{{ groups.glusterfs_registry[0] }}{% else %}{{ groups.glusterfs[0] }}{% endif %}:/{{ openshift.hosted.registry.storage.glusterfs.path }}" name: "{{ mktemp.stdout }}" - name: Set registry volume permissions diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 192afc87a..160ae2f5e 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -55,7 +55,7 @@ state: present with_items: "{{ openshift_hosted_routers }}" -- name: Grant the router serivce account(s) access to the appropriate scc +- name: Grant the router service account(s) access to the appropriate scc oc_adm_policy_user: user: "system:serviceaccount:{{ item.namespace }}:{{ item.serviceaccount }}" namespace: "{{ item.namespace }}" @@ -89,18 +89,37 @@ ports: "{{ item.ports }}" stats_port: "{{ item.stats_port }}" with_items: "{{ openshift_hosted_routers }}" - register: routerout -# This should probably move to module -- name: wait for deploy - pause: - seconds: 30 - when: routerout.changed +- name: Ensure OpenShift router correctly rolls out (best-effort today) + command: | + oc rollout status deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace | default('default') }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + with_items: "{{ openshift_hosted_routers }}" + failed_when: false -- name: Ensure router replica count matches desired - oc_scale: - kind: dc - name: "{{ item.name | default('router') }}" - namespace: "{{ item.namespace | default('default') }}" - replicas: "{{ item.replicas }}" +- name: Determine the latest version of the OpenShift router deployment + command: | + oc get deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_routers_latest_version with_items: "{{ openshift_hosted_routers }}" + +- name: Poll for OpenShift router deployment success + command: | + oc get replicationcontroller {{ item.0.name }}-{{ item.1.stdout }} \ + --namespace {{ item.0.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_router_rc_phase + until: "'Running' not in openshift_hosted_router_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_router_rc_phase.stdout" + with_together: + - "{{ openshift_hosted_routers }}" + - "{{ openshift_hosted_routers_latest_version.results }}" diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md index 0c60ef6fd..dd0f22d4b 100644 --- a/roles/openshift_logging/README.md +++ b/roles/openshift_logging/README.md @@ -55,6 +55,9 @@ When both `openshift_logging_install_logging` and `openshift_logging_upgrade_log - `openshift_logging_fluentd_use_journal`: NOTE: Fluentd will attempt to detect whether or not Docker is using the journald log driver when using the default of empty. - `openshift_logging_fluentd_journal_read_from_head`: If empty, Fluentd will use its internal default, which is false. - `openshift_logging_fluentd_hosts`: List of nodes that should be labeled for Fluentd to be deployed to. Defaults to ['--all']. +- `openshift_logging_fluentd_buffer_queue_limit`: Buffer queue limit for Fluentd. Defaults to 1024. +- `openshift_logging_fluentd_buffer_size_limit`: Buffer chunk limit for Fluentd. Defaults to 1m. + - `openshift_logging_es_host`: The name of the ES service Fluentd should send logs to. Defaults to 'logging-es'. - `openshift_logging_es_port`: The port for the ES service Fluentd should sent its logs to. Defaults to '9200'. @@ -155,3 +158,5 @@ Elasticsearch OPS too, if using an OPS cluster: - `openshift_logging_mux_namespaces`: Default `[]` - additional namespaces to create for _external_ mux clients to associate with their logs - users will need to set this +- `openshift_logging_mux_buffer_queue_limit`: Default `[1024]` - Buffer queue limit for Mux. +- `openshift_logging_mux_buffer_size_limit`: Default `[1m]` - Buffer chunk limit for Mux. diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml index 3c343c9dc..66d880d23 100644 --- a/roles/openshift_logging/defaults/main.yml +++ b/roles/openshift_logging/defaults/main.yml @@ -76,6 +76,8 @@ openshift_logging_fluentd_use_journal: "{{ openshift_hosted_logging_use_journal openshift_logging_fluentd_journal_source: "{{ openshift_hosted_logging_journal_source | default('') }}" openshift_logging_fluentd_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}" openshift_logging_fluentd_hosts: ['--all'] +openshift_logging_fluentd_buffer_queue_limit: 1024 +openshift_logging_fluentd_buffer_size_limit: 1m openshift_logging_es_host: logging-es openshift_logging_es_port: 9200 diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index 7c1062b77..66dc0e096 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -119,6 +119,12 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" + openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" + openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" + openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" + openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" + openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" + openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" with_together: - "{{ openshift_logging_facts.elasticsearch_ops.deploymentconfigs }}" @@ -141,6 +147,12 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" + openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" + openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" + openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" + openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" + openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" + openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" with_sequence: count={{ openshift_logging_es_ops_cluster_size | int - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count }} when: diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index f1d15b76d..684dbe0a0 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -269,6 +269,75 @@ - "{{ tempdir }}/templates/logging-es-dc.yml" delete_after: true +- name: Retrieving the cert to use when generating secrets for the {{ es_component }} component + slurp: + src: "{{ generated_certs_dir }}/{{ item.file }}" + register: key_pairs + with_items: + - { name: "ca_file", file: "ca.crt" } + - { name: "es_key", file: "system.logging.es.key" } + - { name: "es_cert", file: "system.logging.es.crt" } + when: openshift_logging_es_allow_external | bool + +- set_fact: + es_key: "{{ lookup('file', openshift_logging_es_key) | b64encode }}" + when: + - openshift_logging_es_key | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_cert: "{{ lookup('file', openshift_logging_es_cert) | b64encode }}" + when: + - openshift_logging_es_cert | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_ca: "{{ lookup('file', openshift_logging_es_ca_ext) | b64encode }}" + when: + - openshift_logging_es_ca_ext | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_ca: "{{ key_pairs | entry_from_named_pair('ca_file') }}" + when: + - es_ca is not defined + - openshift_logging_es_allow_external | bool + changed_when: false + +- name: Generating Elasticsearch {{ es_component }} route template + template: + src: route_reencrypt.j2 + dest: "{{mktemp.stdout}}/templates/logging-{{ es_component }}-route.yaml" + vars: + obj_name: "logging-{{ es_component }}" + route_host: "{{ openshift_logging_es_hostname }}" + service_name: "logging-{{ es_component }}" + tls_key: "{{ es_key | default('') | b64decode }}" + tls_cert: "{{ es_cert | default('') | b64decode }}" + tls_ca_cert: "{{ es_ca | b64decode }}" + tls_dest_ca_cert: "{{ key_pairs | entry_from_named_pair('ca_file') | b64decode }}" + edge_term_policy: "{{ openshift_logging_es_edge_term_policy | default('') }}" + labels: + component: support + logging-infra: support + provider: openshift + changed_when: no + when: openshift_logging_es_allow_external | bool + +# This currently has an issue if the host name changes +- name: Setting Elasticsearch {{ es_component }} route + oc_obj: + state: present + name: "logging-{{ es_component }}" + namespace: "{{ openshift_logging_elasticsearch_namespace }}" + kind: route + files: + - "{{ tempdir }}/templates/logging-{{ es_component }}-route.yaml" + when: openshift_logging_es_allow_external | bool + ## Placeholder for migration when necessary ## - name: Delete temp directory diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 index 377abe21f..38948ba2f 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 @@ -35,6 +35,12 @@ appender: layout: type: consolePattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + # need this filter until https://github.com/openshift/origin/issues/14515 is fixed + filter: + 1: + type: org.apache.log4j.varia.StringMatchFilter + StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" + AcceptOnMatch: false file: type: dailyRollingFile @@ -43,6 +49,12 @@ appender: layout: type: pattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + # need this filter until https://github.com/openshift/origin/issues/14515 is fixed + filter: + 1: + type: org.apache.log4j.varia.StringMatchFilter + StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" + AcceptOnMatch: false # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files. # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 index 690b5c097..141967c33 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 @@ -17,6 +17,7 @@ node: name: ${DC_NAME} master: ${IS_MASTER} data: ${HAS_DATA} + max_local_storage_nodes: 1 network: host: 0.0.0.0 diff --git a/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 new file mode 100644 index 000000000..cf8a9e65f --- /dev/null +++ b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 @@ -0,0 +1,36 @@ +apiVersion: "v1" +kind: "Route" +metadata: + name: "{{obj_name}}" +{% if labels is defined%} + labels: +{% for key, value in labels.iteritems() %} + {{key}}: {{value}} +{% endfor %} +{% endif %} +spec: + host: {{ route_host }} + tls: +{% if tls_key is defined and tls_key | length > 0 %} + key: | +{{ tls_key|indent(6, true) }} +{% if tls_cert is defined and tls_cert | length > 0 %} + certificate: | +{{ tls_cert|indent(6, true) }} +{% endif %} +{% endif %} + caCertificate: | +{% for line in tls_ca_cert.split('\n') %} + {{ line }} +{% endfor %} + destinationCACertificate: | +{% for line in tls_dest_ca_cert.split('\n') %} + {{ line }} +{% endfor %} + termination: reencrypt +{% if edge_term_policy is defined and edge_term_policy | length > 0 %} + insecureEdgeTerminationPolicy: {{ edge_term_policy }} +{% endif %} + to: + kind: Service + name: {{ service_name }} diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index e185938e3..a5695ee26 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -93,6 +93,14 @@ spec: value: "{{ openshift_logging_fluentd_journal_source | default('') }}" - name: "JOURNAL_READ_FROM_HEAD" value: "{{ openshift_logging_fluentd_journal_read_from_head | lower }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_size_limit }}" + - name: "FLUENTD_CPU_LIMIT" + value: "{{ openshift_logging_fluentd_cpu_limit }}" + - name: "FLUENTD_MEMORY_LIMIT" + value: "{{ openshift_logging_fluentd_memory_limit }}" volumes: - name: runlogjournal hostPath: diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml index 10fa4372c..77e47d38c 100644 --- a/roles/openshift_logging_mux/defaults/main.yml +++ b/roles/openshift_logging_mux/defaults/main.yml @@ -10,7 +10,9 @@ openshift_logging_mux_namespace: logging ### Common settings openshift_logging_mux_nodeselector: "{{ openshift_hosted_logging_mux_nodeselector_label | default('') | map_from_pairs }}" openshift_logging_mux_cpu_limit: 500m -openshift_logging_mux_memory_limit: 1Gi +openshift_logging_mux_memory_limit: 2Gi +openshift_logging_mux_buffer_queue_limit: 1024 +openshift_logging_mux_buffer_size_limit: 1m openshift_logging_mux_replicas: 1 diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 502cd3347..243698c6a 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -103,6 +103,14 @@ spec: value: "true" - name: MUX_ALLOW_EXTERNAL value: "{{ openshift_logging_mux_allow_external | default('false') }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_mux_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_mux_buffer_size_limit }}" + - name: "MUX_CPU_LIMIT" + value: "{{ openshift_logging_mux_cpu_limit }}" + - name: "MUX_MEMORY_LIMIT" + value: "{{ openshift_logging_mux_memory_limit }}" volumes: - name: config configMap: diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml index 14a1daf6c..2d3ce5bcd 100644 --- a/roles/openshift_master/defaults/main.yml +++ b/roles/openshift_master/defaults/main.yml @@ -1,4 +1,4 @@ --- openshift_node_ips: [] -# TODO: update setting these values based on the facts -#openshift_version: "{{ openshift_pkg_version | default(openshift_image_tag | default(openshift.docker.openshift_image_tag | default(''))) }}" +r_openshift_master_clean_install: false +r_openshift_master_etcd3_storage: false diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index aed5598c0..86532cd0a 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -128,6 +128,9 @@ when: openshift.master.request_header_ca is defined and item.kind == 'RequestHeaderIdentityProvider' and item.clientCA | default('') != '' with_items: "{{ openshift.master.identity_providers }}" +- set_fact: + openshift_push_via_dns: "{{ openshift_use_dnsmasq | default(true) and openshift.common.version_gte_3_6 and r_openshift_master_clean_install }}" + - name: Install the systemd units include: systemd_units.yml diff --git a/roles/openshift_master/templates/atomic-openshift-master.j2 b/roles/openshift_master/templates/atomic-openshift-master.j2 index 6e2439fd9..850fae0e4 100644 --- a/roles/openshift_master/templates/atomic-openshift-master.j2 +++ b/roles/openshift_master/templates/atomic-openshift-master.j2 @@ -1,5 +1,8 @@ OPTIONS=--loglevel={{ openshift.master.debug_level | default(2) }} CONFIG_FILE={{ openshift_master_config_file }} +{% if openshift_push_via_dns | default(false) %} +OPENSHIFT_DEFAULT_REGISTRY=docker-registry.default.svc:5000 +{% endif %} {% if openshift.common.is_containerized | bool %} IMAGE_VERSION={{ openshift_image_tag }} {% endif %} diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index 1935d9592..6c26e5092 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -139,6 +139,12 @@ kubernetesMasterConfig: - v1 {% endif %} apiServerArguments: {{ openshift.master.api_server_args | default(None) | to_padded_yaml( level=2 ) }} +{% if r_openshift_master_etcd3_storage or ( r_openshift_master_clean_install and openshift.common.version_gte_3_6 ) %} + storage-backend: + - etcd3 + storage-media-type: + - application/vnd.kubernetes.protobuf +{% endif %} controllerArguments: {{ openshift.master.controller_args | default(None) | to_padded_yaml( level=2 ) }} masterCount: {{ openshift.master.master_count if openshift.master.cluster_method | default(None) == 'native' else 1 }} masterIP: {{ openshift.common.ip }} diff --git a/roles/openshift_metrics/README.md b/roles/openshift_metrics/README.md index 84503217b..1f10de4a2 100644 --- a/roles/openshift_metrics/README.md +++ b/roles/openshift_metrics/README.md @@ -68,6 +68,9 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml). - `openshift_metrics_resolution`: How often metrics should be gathered. +- `openshift_metrics_install_hawkular_agent`: Install the Hawkular OpenShift Agent (HOSA). HOSA can be used + to collect custom metrics from your pods. This component is currently in tech-preview and is not installed by default. + ## Additional variables to control resource limits Each metrics component (hawkular, cassandra, heapster) can specify a cpu and memory limits and requests by setting the corresponding role variable: diff --git a/roles/openshift_metrics/defaults/main.yaml b/roles/openshift_metrics/defaults/main.yaml index 467db34c8..c34936930 100644 --- a/roles/openshift_metrics/defaults/main.yaml +++ b/roles/openshift_metrics/defaults/main.yaml @@ -16,7 +16,7 @@ openshift_metrics_hawkular_nodeselector: "" openshift_metrics_cassandra_replicas: 1 openshift_metrics_cassandra_storage_type: "{{ openshift_hosted_metrics_storage_kind | default('emptydir') }}" openshift_metrics_cassandra_pvc_size: "{{ openshift_hosted_metrics_storage_volume_size | default('10Gi') }}" -openshift_metrics_cassandra_pv_selector: "{{ openshift_hosted_metrics_storage_labels | default(null) }}" +openshift_metrics_cassandra_pv_selector: "{{ openshift_hosted_metrics_storage_labels | default('') }}" openshift_metrics_cassandra_limits_memory: 2G openshift_metrics_cassandra_limits_cpu: null openshift_metrics_cassandra_requests_memory: 1G @@ -31,6 +31,14 @@ openshift_metrics_heapster_requests_memory: 0.9375G openshift_metrics_heapster_requests_cpu: null openshift_metrics_heapster_nodeselector: "" +openshift_metrics_install_hawkular_agent: False +openshift_metrics_hawkular_agent_limits_memory: null +openshift_metrics_hawkular_agent_limits_cpu: null +openshift_metrics_hawkular_agent_requests_memory: null +openshift_metrics_hawkular_agent_requests_cpu: null +openshift_metrics_hawkular_agent_nodeselector: "" +openshift_metrics_hawkular_agent_namespace: "default" + openshift_metrics_hawkular_hostname: "hawkular-metrics.{{openshift_master_default_subdomain}}" openshift_metrics_duration: 7 diff --git a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml index fb4fe2f03..7b81b3c10 100644 --- a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml +++ b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml @@ -73,6 +73,8 @@ {{ hawkular_secrets['hawkular-metrics.key'] }} tls.truststore.crt: > {{ hawkular_secrets['hawkular-cassandra.crt'] }} + ca.crt: > + {{ hawkular_secrets['ca.crt'] }} when: name not in metrics_secrets.stdout_lines changed_when: no diff --git a/roles/openshift_metrics/tasks/install_hosa.yaml b/roles/openshift_metrics/tasks/install_hosa.yaml new file mode 100644 index 000000000..cc533a68b --- /dev/null +++ b/roles/openshift_metrics/tasks/install_hosa.yaml @@ -0,0 +1,44 @@ +--- +- name: Generate Hawkular Agent (HOSA) Cluster Role + template: + src: hawkular_openshift_agent_role.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-role.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Service Account + template: + src: hawkular_openshift_agent_sa.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-sa.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Daemon Set + template: + src: hawkular_openshift_agent_ds.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-ds.yaml" + vars: + node_selector: "{{openshift_metrics_hawkular_agent_nodeselector | default('') }}" + changed_when: no + +- name: Generate the Hawkular Agent (HOSA) Configmap + template: + src: hawkular_openshift_agent_cm.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-cm.yaml" + changed_when: no + +- name: Generate role binding for the hawkular-openshift-agent service account + template: + src: rolebinding.j2 + dest: "{{ mktemp.stdout }}/templates/metrics-hawkular-agent-rolebinding.yaml" + vars: + cluster: True + obj_name: hawkular-openshift-agent-rb + labels: + metrics-infra: hawkular-agent + roleRef: + kind: ClusterRole + name: hawkular-openshift-agent + subjects: + - kind: ServiceAccount + name: hawkular-openshift-agent + namespace: "{{openshift_metrics_hawkular_agent_namespace}}" + changed_when: no diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 74eb56713..fdf4ae57f 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -16,11 +16,19 @@ include: install_heapster.yaml when: openshift_metrics_heapster_standalone | bool -- find: paths={{ mktemp.stdout }}/templates patterns=*.yaml +- name: Install Hawkular OpenShift Agent (HOSA) + include: install_hosa.yaml + when: openshift_metrics_install_hawkular_agent | default(false) | bool + +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^(?!metrics-hawkular-openshift-agent).*.yaml" + use_regex: true register: object_def_files changed_when: no -- slurp: src={{item.path}} +- slurp: + src: "{{item.path}}" register: object_defs with_items: "{{object_def_files.files}}" changed_when: no @@ -34,6 +42,31 @@ file_content: "{{ item.content | b64decode | from_yaml }}" with_items: "{{ object_defs.results }}" +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^metrics-hawkular-openshift-agent.*.yaml" + use_regex: true + register: hawkular_agent_object_def_files + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- slurp: + src: "{{item.path}}" + register: hawkular_agent_object_defs + with_items: "{{ hawkular_agent_object_def_files.files }}" + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- name: Create Hawkular Agent objects + include: oc_apply.yaml + vars: + kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" + namespace: "{{ openshift_metrics_hawkular_agent_namespace }}" + file_name: "{{ item.source }}" + file_content: "{{ item.content | b64decode | from_yaml }}" + with_items: "{{ hawkular_agent_object_defs.results }}" + when: openshift_metrics_install_hawkular_agent | bool + - include: update_master_config.yaml - command: > diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 5d8506a73..0b5f23c24 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -44,6 +44,9 @@ - include: "{{ (openshift_metrics_install_metrics | bool) | ternary('install_metrics.yaml','uninstall_metrics.yaml') }}" +- include: uninstall_hosa.yaml + when: not openshift_metrics_install_hawkular_agent | bool + - name: Delete temp directory local_action: file path=local_tmp.stdout state=absent tags: metrics_cleanup diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index dd67703b4..1e1af40e8 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -14,7 +14,7 @@ command: > {{ openshift.common.client_binary }} --config={{ kubeconfig }} apply -f {{ file_name }} - -n {{ openshift_metrics_project }} + -n {{namespace}} register: generation_apply failed_when: "'error' in generation_apply.stderr" changed_when: no diff --git a/roles/openshift_metrics/tasks/uninstall_hosa.yaml b/roles/openshift_metrics/tasks/uninstall_hosa.yaml new file mode 100644 index 000000000..42ed02460 --- /dev/null +++ b/roles/openshift_metrics/tasks/uninstall_hosa.yaml @@ -0,0 +1,15 @@ +--- +- name: remove Hawkular Agent (HOSA) components + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found --selector=metrics-infra=agent + all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings + register: delete_metrics + changed_when: delete_metrics.stdout != 'No resources found' + +- name: remove rolebindings + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found + clusterrolebinding/hawkular-openshift-agent-rb + changed_when: delete_metrics.stdout != 'No resources found' diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 new file mode 100644 index 000000000..bf472c066 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 @@ -0,0 +1,54 @@ +id: hawkular-openshift-agent +kind: ConfigMap +apiVersion: v1 +name: Hawkular OpenShift Agent Configuration +metadata: + name: hawkular-openshift-agent-configuration + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +data: + config.yaml: | + kubernetes: + tenant: ${POD:namespace_name} + hawkular_server: + url: https://hawkular-metrics.openshift-infra.svc.cluster.local + credentials: + username: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.username + password: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.password + ca_cert_file: secret:openshift-infra/hawkular-metrics-certs/ca.crt + emitter: + status_enabled: false + collector: + minimum_collection_interval: 10s + default_collection_interval: 30s + metric_id_prefix: pod/${POD:uid}/custom/ + tags: + metric_name: ${METRIC:name} + description: ${METRIC:description} + units: ${METRIC:units} + namespace_id: ${POD:namespace_uid} + namespace_name: ${POD:namespace_name} + node_name: ${POD:node_name} + pod_id: ${POD:uid} + pod_ip: ${POD:ip} + pod_name: ${POD:name} + pod_namespace: ${POD:namespace_name} + hostname: ${POD:hostname} + host_ip: ${POD:host_ip} + labels: ${POD:labels} + type: pod + collector: hawkular_openshift_agent + custom_metric: true + hawkular-openshift-agent: | + endpoints: + - type: prometheus + protocol: "http" + port: 8080 + path: /metrics + collection_interval: 30s + metrics: + - name: hawkular_openshift_agent_metric_data_points_collected_total + - name: hawkular_openshift_agent_monitored_endpoints + - name: hawkular_openshift_agent_monitored_pods + - name: hawkular_openshift_agent_monitored_metrics diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 new file mode 100644 index 000000000..d65eaf9ae --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 @@ -0,0 +1,91 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: hawkular-openshift-agent + labels: + name: hawkular-openshift-agent + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +spec: + selector: + matchLabels: + name: hawkular-openshift-agent + template: + metadata: + labels: + name: hawkular-openshift-agent + metrics-infra: agent + spec: + serviceAccount: hawkular-openshift-agent +{% if node_selector is iterable and node_selector | length > 0 %} + nodeSelector: +{% for key, value in node_selector.iteritems() %} + {{key}}: "{{value}}" +{% endfor %} +{% endif %} + containers: + - image: {{openshift_metrics_image_prefix}}metrics-hawkular-openshift-agent:{{openshift_metrics_image_version}} + imagePullPolicy: Always + name: hawkular-openshift-agent +{% if ((openshift_metrics_hawkular_agent_limits_cpu is defined and openshift_metrics_hawkular_agent_limits_cpu is not none) + or (openshift_metrics_hawkular_agent_limits_memory is defined and openshift_metrics_hawkular_agent_limits_memory is not none) + or (openshift_metrics_hawkular_agent_requests_cpu is defined and openshift_metrics_hawkular_agent_requests_cpu is not none) + or (openshift_metrics_hawkular_agent_requests_memory is defined and openshift_metrics_hawkular_agent_requests_memory is not none)) +%} + resources: +{% if (openshift_metrics_hawkular_agent_limits_cpu is not none + or openshift_metrics_hawkular_agent_limits_memory is not none) +%} + limits: +{% if openshift_metrics_hawkular_agent_limits_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_limits_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_limits_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_limits_memory}}" +{% endif %} +{% endif %} +{% if (openshift_metrics_hawkular_agent_requests_cpu is not none + or openshift_metrics_hawkular_agent_requests_memory is not none) +%} + requests: +{% if openshift_metrics_hawkular_agent_requests_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_requests_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_requests_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_requests_memory}}" +{% endif %} +{% endif %} +{% endif %} + + livenessProbe: + httpGet: + scheme: HTTP + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 30 + command: + - "hawkular-openshift-agent" + - "-config" + - "/hawkular-openshift-agent-configuration/config.yaml" + - "-v" + - "3" + env: + - name: K8S_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: hawkular-openshift-agent-configuration + mountPath: "/hawkular-openshift-agent-configuration" + volumes: + - name: hawkular-openshift-agent-configuration + configMap: + name: hawkular-openshift-agent-configuration + - name: hawkular-openshift-agent + configMap: + name: hawkular-openshift-agent-configuration diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 new file mode 100644 index 000000000..24b8cd801 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ClusterRole +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent +rules: +- apiGroups: + - "" + resources: + - configmaps + - namespaces + - nodes + - pods + - projects + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - get diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 new file mode 100644 index 000000000..ec604d73c --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} diff --git a/roles/openshift_node_dnsmasq/files/networkmanager/99-origin-dns.sh b/roles/openshift_node_dnsmasq/files/networkmanager/99-origin-dns.sh index 24798d3d2..c68073a10 100755 --- a/roles/openshift_node_dnsmasq/files/networkmanager/99-origin-dns.sh +++ b/roles/openshift_node_dnsmasq/files/networkmanager/99-origin-dns.sh @@ -96,6 +96,9 @@ EOF if ! grep -q '99-origin-dns.sh' ${NEW_RESOLV_CONF}; then echo "# nameserver updated by /etc/NetworkManager/dispatcher.d/99-origin-dns.sh" >> ${NEW_RESOLV_CONF} fi + if ! grep -q 'search.*cluster.local' ${NEW_RESOLV_CONF}; then + sed -i '/^search/ s/$/ cluster.local/' ${NEW_RESOLV_CONF} + fi cp -Z ${NEW_RESOLV_CONF} /etc/resolv.conf fi fi diff --git a/roles/openshift_node_upgrade/tasks/main.yml b/roles/openshift_node_upgrade/tasks/main.yml index d44839d69..8eaa68cc9 100644 --- a/roles/openshift_node_upgrade/tasks/main.yml +++ b/roles/openshift_node_upgrade/tasks/main.yml @@ -147,3 +147,6 @@ # Give the node two minutes to come back online. retries: 24 delay: 5 + +- include_role: + name: openshift_node_dnsmasq diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 023b1a9b7..8f8550e2d 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -4,7 +4,8 @@ path: /run/ostree-booted register: ostree_booted -- block: +- when: not ostree_booted.stat.exists + block: - name: Ensure libselinux-python is installed package: name=libselinux-python state=present @@ -24,41 +25,40 @@ - openshift_additional_repos | length == 0 notify: refresh cache - # Note: OpenShift repositories under CentOS may be shipped through the - # "centos-release-openshift-origin" package which configures the repository. - # This task matches the file names provided by the package so that they are - # not installed twice in different files and remains idempotent. - - name: Configure origin gpg keys if needed - copy: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - with_items: - - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS - dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS - - src: origin/repos/openshift-ansible-centos-paas-sig.repo - dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo - notify: refresh cache - when: - - ansible_os_family == "RedHat" - - ansible_distribution != "Fedora" - - openshift_deployment_type == 'origin' - - openshift_enable_origin_repo | default(true) | bool - # Singleton block - - when: r_osr_first_run | default(true) + - when: r_openshift_repos_has_run is not defined block: + + # Note: OpenShift repositories under CentOS may be shipped through the + # "centos-release-openshift-origin" package which configures the repository. + # This task matches the file names provided by the package so that they are + # not installed twice in different files and remains idempotent. + - name: Configure origin gpg keys if needed + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + with_items: + - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS + dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS + - src: origin/repos/openshift-ansible-centos-paas-sig.repo + dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo + notify: refresh cache + when: + - ansible_os_family == "RedHat" + - ansible_distribution != "Fedora" + - openshift_deployment_type == 'origin' + - openshift_enable_origin_repo | default(true) | bool + - name: Ensure clean repo cache in the event repos have been changed manually debug: msg: "First run of openshift_repos" changed_when: true notify: refresh cache - - name: Set fact r_osr_first_run false + - name: Record that openshift_repos already ran set_fact: - r_osr_first_run: false + r_openshift_repos_has_run: True # Force running ALL handlers now, because we expect repo cache to be cleared # if changes have been made. - meta: flush_handlers - - when: not ostree_booted.stat.exists diff --git a/roles/openshift_storage_glusterfs/README.md b/roles/openshift_storage_glusterfs/README.md index 7b310dbf8..da4e348b4 100644 --- a/roles/openshift_storage_glusterfs/README.md +++ b/roles/openshift_storage_glusterfs/README.md @@ -1,7 +1,31 @@ OpenShift GlusterFS Cluster =========================== -OpenShift GlusterFS Cluster Installation +OpenShift GlusterFS Cluster Configuration + +This role handles the configuration of GlusterFS clusters. It can handle +two primary configuration scenarios: + +* Configuring a new, natively-hosted GlusterFS cluster. In this scenario, + GlusterFS pods are deployed on nodes in the OpenShift cluster which are + configured to provide storage. +* Configuring a new, external GlusterFS cluster. In this scenario, the + cluster nodes have the GlusterFS software pre-installed but have not + been configured yet. The installer will take care of configuring the + cluster(s) for use by OpenShift applications. +* Using existing GlusterFS clusters. In this scenario, one or more + GlusterFS clusters are assumed to be already setup. These clusters can + be either natively-hosted or external, but must be managed by a + [heketi service](https://github.com/heketi/heketi). + +As part of the configuration, a particular GlusterFS cluster may be +specified to provide backend storage for a natively-hosted Docker +registry. + +Unless configured otherwise, a StorageClass will be automatically +created for each non-registry GlusterFS cluster. This will allow +applications which can mount PersistentVolumes to request +dynamically-provisioned GlusterFS volumes. Requirements ------------ @@ -21,28 +45,53 @@ hosted Docker registry: * `[glusterfs_registry]` +Host Variables +-------------- + +For configuring new clusters, the following role variables are available. + +Each host in either of the above groups must have the following variable +defined: + +| Name | Default value | Description | +|-------------------|---------------|-----------------------------------------| +| glusterfs_devices | None | A list of block devices that will be completely managed as part of a GlusterFS cluster. There must be at least one device listed. Each device must be bare, e.g. no partitions or LVM PVs. **Example:** '[ "/dev/sdb" ]' + +In addition, each host may specify the following variables to further control +their configuration as GlusterFS nodes: + +| Name | Default value | Description | +|--------------------|---------------------------|-----------------------------------------| +| glusterfs_cluster | 1 | The ID of the cluster this node should belong to. This is useful when a single heketi service is expected to manage multiple distinct clusters. **NOTE:** For natively-hosted clusters, all pods will be in the same OpenShift namespace +| glusterfs_hostname | openshift.common.hostname | A hostname (or IP address) that will be used for internal GlusterFS communication +| glusterfs_ip | openshift.common.ip | An IP address that will be used by pods to communicate with the GlusterFS node +| glusterfs_zone | 1 | A zone number for the node. Zones are used within the cluster for determining how to distribute the bricks of GlusterFS volumes. heketi will try to spread each volumes' bricks as evenly as possible across all zones + Role Variables -------------- This role has the following variables that control the integration of a GlusterFS cluster into a new or existing OpenShift cluster: -| Name | Default value | | +| Name | Default value | Description | |--------------------------------------------------|-------------------------|-----------------------------------------| | openshift_storage_glusterfs_timeout | 300 | Seconds to wait for pods to become ready | openshift_storage_glusterfs_namespace | 'default' | Namespace in which to create GlusterFS resources | openshift_storage_glusterfs_is_native | True | GlusterFS should be containerized -| openshift_storage_glusterfs_nodeselector | 'storagenode=glusterfs' | Selector to determine which nodes will host GlusterFS pods in native mode +| openshift_storage_glusterfs_name | 'storage' | A name to identify the GlusterFS cluster, which will be used in resource names +| openshift_storage_glusterfs_nodeselector | 'glusterfs=storage-host'| Selector to determine which nodes will host GlusterFS pods in native mode. **NOTE:** The label value is taken from the cluster name +| openshift_storage_glusterfs_storageclass | True | Automatically create a StorageClass for each GlusterFS cluster | openshift_storage_glusterfs_image | 'gluster/gluster-centos'| Container image to use for GlusterFS pods, enterprise default is 'rhgs3/rhgs-server-rhel7' | openshift_storage_glusterfs_version | 'latest' | Container image version to use for GlusterFS pods | openshift_storage_glusterfs_wipe | False | Destroy any existing GlusterFS resources and wipe storage devices. **WARNING: THIS WILL DESTROY ANY DATA ON THOSE DEVICES.** | openshift_storage_glusterfs_heketi_is_native | True | heketi should be containerized | openshift_storage_glusterfs_heketi_image | 'heketi/heketi' | Container image to use for heketi pods, enterprise default is 'rhgs3/rhgs-volmanager-rhel7' | openshift_storage_glusterfs_heketi_version | 'latest' | Container image version to use for heketi pods -| openshift_storage_glusterfs_heketi_admin_key | '' | String to use as secret key for performing heketi commands as admin -| openshift_storage_glusterfs_heketi_user_key | '' | String to use as secret key for performing heketi commands as user that can only view or modify volumes +| openshift_storage_glusterfs_heketi_admin_key | auto-generated | String to use as secret key for performing heketi commands as admin +| openshift_storage_glusterfs_heketi_user_key | auto-generated | String to use as secret key for performing heketi commands as user that can only view or modify volumes | openshift_storage_glusterfs_heketi_topology_load | True | Load the GlusterFS topology information into heketi -| openshift_storage_glusterfs_heketi_url | Undefined | URL for the heketi REST API, dynamically determined in native mode +| openshift_storage_glusterfs_heketi_url | Undefined | When heketi is native, this sets the hostname portion of the final heketi route URL. When heketi is external, this is the full URL to the heketi service. +| openshift_storage_glusterfs_heketi_port | 8080 | TCP port for external heketi service **NOTE:** This has no effect in native mode | openshift_storage_glusterfs_heketi_wipe | False | Destroy any existing heketi resources, defaults to the value of `openshift_storage_glusterfs_wipe` Each role variable also has a corresponding variable to optionally configure a @@ -52,17 +101,24 @@ registry. These variables start with the prefix values in their corresponding non-registry variables. The following variables are an exception: -| Name | Default value | | -|---------------------------------------------------|-----------------------|-----------------------------------------| -| openshift_storage_glusterfs_registry_namespace | registry namespace | Default is to use the hosted registry's namespace, otherwise 'default' -| openshift_storage_glusterfs_registry_nodeselector | 'storagenode=registry'| This allows for the logical separation of the registry GlusterFS cluster from any regular-use GlusterFS clusters +| Name | Default value | Description | +|-------------------------------------------------------|-----------------------|-----------------------------------------| +| openshift_storage_glusterfs_registry_namespace | registry namespace | Default is to use the hosted registry's namespace, otherwise 'default' +| openshift_storage_glusterfs_registry_name | 'registry' | This allows for the logical separation of the registry GlusterFS cluster from other GlusterFS clusters +| openshift_storage_glusterfs_registry_storageclass | False | It is recommended to not create a StorageClass for GlusterFS clusters serving registry storage, so as to avoid performance penalties +| openshift_storage_glusterfs_registry_heketi_admin_key | auto-generated | Separate from the above +| openshift_storage_glusterfs_registry_heketi_user_key | auto-generated | Separate from the above Additionally, this role's behavior responds to the following registry-specific -variable: - -| Name | Default value | Description | -|----------------------------------------------|---------------|------------------------------------------------------------------------------| -| openshift_hosted_registry_glusterfs_swap | False | Whether to swap an existing registry's storage volume for a GlusterFS volume | +variables: + +| Name | Default value | Description | +|-----------------------------------------------|------------------------------|-----------------------------------------| +| openshift_hosted_registry_glusterfs_endpoints | glusterfs-registry-endpoints | The name for the Endpoints resource that will point the registry to the GlusterFS nodes +| openshift_hosted_registry_glusterfs_path | glusterfs-registry-volume | The name for the GlusterFS volume that will provide registry storage +| openshift_hosted_registry_glusterfs_readonly | False | Whether the GlusterFS volume should be read-only +| openshift_hosted_registry_glusterfs_swap | False | Whether to swap an existing registry's storage volume for a GlusterFS volume +| openshift_hosted_registry_glusterfs_swapcopy | True | If swapping, copy the contents of the pre-existing registry storage to the new GlusterFS volume Dependencies ------------ diff --git a/roles/openshift_storage_glusterfs/defaults/main.yml b/roles/openshift_storage_glusterfs/defaults/main.yml index ebe9ca30b..4ff56af9e 100644 --- a/roles/openshift_storage_glusterfs/defaults/main.yml +++ b/roles/openshift_storage_glusterfs/defaults/main.yml @@ -2,7 +2,9 @@ openshift_storage_glusterfs_timeout: 300 openshift_storage_glusterfs_namespace: 'default' openshift_storage_glusterfs_is_native: True -openshift_storage_glusterfs_nodeselector: 'storagenode=glusterfs' +openshift_storage_glusterfs_name: 'storage' +openshift_storage_glusterfs_nodeselector: "glusterfs={{ openshift_storage_glusterfs_name }}-host" +openshift_storage_glusterfs_storageclass: True openshift_storage_glusterfs_image: "{{ 'rhgs3/rhgs-server-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'gluster/gluster-centos' | quote }}" openshift_storage_glusterfs_version: 'latest' openshift_storage_glusterfs_wipe: False @@ -11,16 +13,19 @@ openshift_storage_glusterfs_heketi_is_missing: True openshift_storage_glusterfs_heketi_deploy_is_missing: True openshift_storage_glusterfs_heketi_image: "{{ 'rhgs3/rhgs-volmanager-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'heketi/heketi' | quote }}" openshift_storage_glusterfs_heketi_version: 'latest' -openshift_storage_glusterfs_heketi_admin_key: '' -openshift_storage_glusterfs_heketi_user_key: '' +openshift_storage_glusterfs_heketi_admin_key: "{{ omit }}" +openshift_storage_glusterfs_heketi_user_key: "{{ omit }}" openshift_storage_glusterfs_heketi_topology_load: True openshift_storage_glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_wipe }}" openshift_storage_glusterfs_heketi_url: "{{ omit }}" +openshift_storage_glusterfs_heketi_port: 8080 openshift_storage_glusterfs_registry_timeout: "{{ openshift_storage_glusterfs_timeout }}" openshift_storage_glusterfs_registry_namespace: "{{ openshift.hosted.registry.namespace | default('default') }}" openshift_storage_glusterfs_registry_is_native: "{{ openshift_storage_glusterfs_is_native }}" -openshift_storage_glusterfs_registry_nodeselector: 'storagenode=registry' +openshift_storage_glusterfs_registry_name: 'registry' +openshift_storage_glusterfs_registry_nodeselector: "glusterfs={{ openshift_storage_glusterfs_registry_name }}-host" +openshift_storage_glusterfs_registry_storageclass: False openshift_storage_glusterfs_registry_image: "{{ openshift_storage_glusterfs_image }}" openshift_storage_glusterfs_registry_version: "{{ openshift_storage_glusterfs_version }}" openshift_storage_glusterfs_registry_wipe: "{{ openshift_storage_glusterfs_wipe }}" @@ -29,8 +34,9 @@ openshift_storage_glusterfs_registry_heketi_is_missing: "{{ openshift_storage_gl openshift_storage_glusterfs_registry_heketi_deploy_is_missing: "{{ openshift_storage_glusterfs_heketi_deploy_is_missing }}" openshift_storage_glusterfs_registry_heketi_image: "{{ openshift_storage_glusterfs_heketi_image }}" openshift_storage_glusterfs_registry_heketi_version: "{{ openshift_storage_glusterfs_heketi_version }}" -openshift_storage_glusterfs_registry_heketi_admin_key: "{{ openshift_storage_glusterfs_heketi_admin_key }}" -openshift_storage_glusterfs_registry_heketi_user_key: "{{ openshift_storage_glusterfs_heketi_user_key }}" +openshift_storage_glusterfs_registry_heketi_admin_key: "{{ omit }}" +openshift_storage_glusterfs_registry_heketi_user_key: "{{ omit }}" openshift_storage_glusterfs_registry_heketi_topology_load: "{{ openshift_storage_glusterfs_heketi_topology_load }}" openshift_storage_glusterfs_registry_heketi_wipe: "{{ openshift_storage_glusterfs_heketi_wipe }}" openshift_storage_glusterfs_registry_heketi_url: "{{ openshift_storage_glusterfs_heketi_url | default(omit) }}" +openshift_storage_glusterfs_registry_heketi_port: 8080 diff --git a/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml b/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml index c9945be13..4434f750c 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml +++ b/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml @@ -9,49 +9,47 @@ metadata: annotations: description: Bootstrap Heketi installation tags: glusterfs,heketi,installation -labels: - template: deploy-heketi objects: - kind: Service apiVersion: v1 metadata: - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} labels: - glusterfs: deploy-heketi-service + glusterfs: deploy-heketi-${CLUSTER_NAME}-service deploy-heketi: support annotations: description: Exposes Heketi service spec: ports: - - name: deploy-heketi + - name: deploy-heketi-${CLUSTER_NAME} port: 8080 targetPort: 8080 selector: - name: deploy-heketi + glusterfs: deploy-heketi-${CLUSTER_NAME}-pod - kind: Route apiVersion: v1 metadata: - name: deploy-heketi + name: ${HEKETI_ROUTE} labels: - glusterfs: deploy-heketi-route + glusterfs: deploy-heketi-${CLUSTER_NAME}-route deploy-heketi: support spec: to: kind: Service - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} - kind: DeploymentConfig apiVersion: v1 metadata: - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} labels: - glusterfs: deploy-heketi-dc + glusterfs: deploy-heketi-${CLUSTER_NAME}-dc deploy-heketi: support annotations: description: Defines how to deploy Heketi spec: replicas: 1 selector: - name: deploy-heketi + glusterfs: deploy-heketi-${CLUSTER_NAME}-pod triggers: - type: ConfigChange strategy: @@ -60,13 +58,12 @@ objects: metadata: name: deploy-heketi labels: - name: deploy-heketi - glusterfs: deploy-heketi-pod + glusterfs: deploy-heketi-${CLUSTER_NAME}-pod deploy-heketi: support spec: - serviceAccountName: heketi-service-account + serviceAccountName: heketi-${CLUSTER_NAME}-service-account containers: - - name: deploy-heketi + - name: heketi image: ${IMAGE_NAME}:${IMAGE_VERSION} env: - name: HEKETI_USER_KEY @@ -81,11 +78,15 @@ objects: value: '14' - name: HEKETI_KUBE_GLUSTER_DAEMONSET value: '1' + - name: HEKETI_KUBE_NAMESPACE + value: ${HEKETI_KUBE_NAMESPACE} ports: - containerPort: 8080 volumeMounts: - name: db mountPath: /var/lib/heketi + - name: topology + mountPath: ${TOPOLOGY_PATH} readinessProbe: timeoutSeconds: 3 initialDelaySeconds: 3 @@ -100,6 +101,9 @@ objects: port: 8080 volumes: - name: db + - name: topology + secret: + secretName: heketi-${CLUSTER_NAME}-topology-secret parameters: - name: HEKETI_USER_KEY displayName: Heketi User Secret @@ -107,9 +111,24 @@ parameters: - name: HEKETI_ADMIN_KEY displayName: Heketi Administrator Secret description: Set secret for administration of the Heketi service as user _admin_ +- name: HEKETI_KUBE_NAMESPACE + displayName: Namespace + description: Set the namespace where the GlusterFS pods reside + value: default +- name: HEKETI_ROUTE + displayName: heketi route name + description: Set the hostname for the route URL + value: "heketi-glusterfs" - name: IMAGE_NAME - displayName: GlusterFS container name + displayName: heketi container image name required: True - name: IMAGE_VERSION - displayName: GlusterFS container versiona + displayName: heketi container image version + required: True +- name: CLUSTER_NAME + displayName: GlusterFS cluster name + description: A unique name to identify this heketi service, useful for running multiple heketi instances + value: glusterfs +- name: TOPOLOGY_PATH + displayName: heketi topology file location required: True diff --git a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml b/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml index c66705752..8c5e1ded3 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml +++ b/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml @@ -12,24 +12,24 @@ objects: - kind: DaemonSet apiVersion: extensions/v1beta1 metadata: - name: glusterfs + name: glusterfs-${CLUSTER_NAME} labels: - glusterfs: daemonset + glusterfs: ${CLUSTER_NAME}-daemonset annotations: description: GlusterFS DaemonSet tags: glusterfs spec: selector: matchLabels: - glusterfs-node: pod + glusterfs: ${CLUSTER_NAME}-pod template: metadata: - name: glusterfs + name: glusterfs-${CLUSTER_NAME} labels: + glusterfs: ${CLUSTER_NAME}-pod glusterfs-node: pod spec: - nodeSelector: - storagenode: glusterfs + nodeSelector: "${{NODE_LABELS}}" hostNetwork: true containers: - name: glusterfs @@ -63,26 +63,26 @@ objects: privileged: true readinessProbe: timeoutSeconds: 3 - initialDelaySeconds: 100 + initialDelaySeconds: 40 exec: command: - "/bin/bash" - "-c" - systemctl status glusterd.service - periodSeconds: 10 + periodSeconds: 25 successThreshold: 1 - failureThreshold: 3 + failureThreshold: 15 livenessProbe: timeoutSeconds: 3 - initialDelaySeconds: 100 + initialDelaySeconds: 40 exec: command: - "/bin/bash" - "-c" - systemctl status glusterd.service - periodSeconds: 10 + periodSeconds: 25 successThreshold: 1 - failureThreshold: 3 + failureThreshold: 15 resources: {} terminationMessagePath: "/dev/termination-log" volumes: @@ -120,9 +120,17 @@ objects: dnsPolicy: ClusterFirst securityContext: {} parameters: +- name: NODE_LABELS + displayName: Daemonset Node Labels + description: Labels which define the daemonset node selector. Must contain at least one label of the format \'glusterfs=<CLUSTER_NAME>-host\' + value: '{ "glusterfs": "storage-host" }' - name: IMAGE_NAME - displayName: GlusterFS container name + displayName: GlusterFS container image name required: True - name: IMAGE_VERSION - displayName: GlusterFS container versiona + displayName: GlusterFS container image version required: True +- name: CLUSTER_NAME + displayName: GlusterFS cluster name + description: A unique name to identify which heketi service manages this cluster, useful for running multiple heketi instances + value: storage diff --git a/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml b/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml index df045c170..e3fa0a9fb 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml +++ b/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml @@ -8,15 +8,13 @@ metadata: annotations: description: Heketi service deployment template tags: glusterfs,heketi -labels: - template: heketi objects: - kind: Service apiVersion: v1 metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-service + glusterfs: heketi-${CLUSTER_NAME}-service annotations: description: Exposes Heketi service spec: @@ -25,40 +23,40 @@ objects: port: 8080 targetPort: 8080 selector: - glusterfs: heketi-pod + glusterfs: heketi-${CLUSTER_NAME}-pod - kind: Route apiVersion: v1 metadata: - name: heketi + name: ${HEKETI_ROUTE} labels: - glusterfs: heketi-route + glusterfs: heketi-${CLUSTER_NAME}-route spec: to: kind: Service - name: heketi + name: heketi-${CLUSTER_NAME} - kind: DeploymentConfig apiVersion: v1 metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-dc + glusterfs: heketi-${CLUSTER_NAME}-dc annotations: description: Defines how to deploy Heketi spec: replicas: 1 selector: - glusterfs: heketi-pod + glusterfs: heketi-${CLUSTER_NAME}-pod triggers: - type: ConfigChange strategy: type: Recreate template: metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-pod + glusterfs: heketi-${CLUSTER_NAME}-pod spec: - serviceAccountName: heketi-service-account + serviceAccountName: heketi-${CLUSTER_NAME}-service-account containers: - name: heketi image: ${IMAGE_NAME}:${IMAGE_VERSION} @@ -76,6 +74,8 @@ objects: value: '14' - name: HEKETI_KUBE_GLUSTER_DAEMONSET value: '1' + - name: HEKETI_KUBE_NAMESPACE + value: ${HEKETI_KUBE_NAMESPACE} ports: - containerPort: 8080 volumeMounts: @@ -96,7 +96,7 @@ objects: volumes: - name: db glusterfs: - endpoints: heketi-storage-endpoints + endpoints: heketi-db-${CLUSTER_NAME}-endpoints path: heketidbstorage parameters: - name: HEKETI_USER_KEY @@ -105,9 +105,21 @@ parameters: - name: HEKETI_ADMIN_KEY displayName: Heketi Administrator Secret description: Set secret for administration of the Heketi service as user _admin_ +- name: HEKETI_KUBE_NAMESPACE + displayName: Namespace + description: Set the namespace where the GlusterFS pods reside + value: default +- name: HEKETI_ROUTE + displayName: heketi route name + description: Set the hostname for the route URL + value: "heketi-glusterfs" - name: IMAGE_NAME - displayName: GlusterFS container name + displayName: heketi container image name required: True - name: IMAGE_VERSION - displayName: GlusterFS container versiona + displayName: heketi container image version required: True +- name: CLUSTER_NAME + displayName: GlusterFS cluster name + description: A unique name to identify this heketi service, useful for running multiple heketi instances + value: glusterfs diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml index fa5fa2cb0..4406ef28b 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml @@ -5,12 +5,6 @@ name: "{{ glusterfs_namespace }}" when: glusterfs_is_native or glusterfs_heketi_is_native -- include: glusterfs_deploy.yml - when: glusterfs_is_native - -- name: Make sure heketi-client is installed - package: name=heketi-client state=present - - name: Delete pre-existing heketi resources oc_obj: namespace: "{{ glusterfs_namespace }}" @@ -21,12 +15,18 @@ with_items: - kind: "template,route,service,dc,jobs,secret" selector: "deploy-heketi" - - kind: "template,route,service,dc" - name: "heketi" - - kind: "svc,ep" + - kind: "svc" name: "heketi-storage-endpoints" + - kind: "secret" + name: "heketi-{{ glusterfs_name }}-topology-secret" + - kind: "template,route,service,dc" + name: "heketi-{{ glusterfs_name }}" + - kind: "svc" + name: "heketi-db-{{ glusterfs_name }}-endpoints" - kind: "sa" - name: "heketi-service-account" + name: "heketi-{{ glusterfs_name }}-service-account" + - kind: "secret" + name: "heketi-{{ glusterfs_name }}-user-secret" failed_when: False when: glusterfs_heketi_wipe @@ -35,11 +35,11 @@ namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=deploy-heketi-pod" + selector: "glusterfs=deploy-heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: "heketi_pod.results.results[0]['items'] | count == 0" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" when: glusterfs_heketi_wipe - name: Wait for heketi pods to terminate @@ -47,23 +47,26 @@ namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=heketi-pod" + selector: "glusterfs=heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: "heketi_pod.results.results[0]['items'] | count == 0" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" when: glusterfs_heketi_wipe +- include: glusterfs_deploy.yml + when: glusterfs_is_native + - name: Create heketi service account oc_serviceaccount: namespace: "{{ glusterfs_namespace }}" - name: heketi-service-account + name: "heketi-{{ glusterfs_name }}-service-account" state: present when: glusterfs_heketi_is_native - name: Add heketi service account to privileged SCC oc_adm_policy_user: - user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-service-account" + user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-{{ glusterfs_name }}-service-account" resource_kind: scc resource_name: privileged state: present @@ -71,7 +74,7 @@ - name: Allow heketi service account to view/edit pods oc_adm_policy_user: - user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-service-account" + user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-{{ glusterfs_name }}-service-account" resource_kind: role resource_name: edit state: present @@ -82,7 +85,7 @@ namespace: "{{ glusterfs_namespace }}" state: list kind: pod - selector: "glusterfs=deploy-heketi-pod,deploy-heketi=support" + selector: "glusterfs=deploy-heketi-{{ glusterfs_name }}-pod" register: heketi_pod when: glusterfs_heketi_is_native @@ -100,7 +103,7 @@ namespace: "{{ glusterfs_namespace }}" state: list kind: pod - selector: "glusterfs=heketi-pod" + selector: "glusterfs=heketi-{{ glusterfs_name }}-pod" register: heketi_pod when: glusterfs_heketi_is_native @@ -113,48 +116,46 @@ # heketi is not missing when there are one or more pods with matching labels whose 'Ready' status is True - "heketi_pod.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count > 0" -- include: heketi_deploy_part1.yml +- name: Generate topology file + template: + src: "{{ openshift.common.examples_content_version }}/topology.json.j2" + dest: "{{ mktemp.stdout }}/topology.json" when: - - glusterfs_heketi_is_native - - glusterfs_heketi_deploy_is_missing - - glusterfs_heketi_is_missing + - glusterfs_heketi_topology_load -- name: Determine heketi URL - oc_obj: - namespace: "{{ glusterfs_namespace }}" - state: list - kind: ep - selector: "glusterfs in (deploy-heketi-service, heketi-service)" - register: heketi_url - until: - - "heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip != ''" - - "heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port != ''" - delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" +- name: Generate heketi admin key + set_fact: + glusterfs_heketi_admin_key: "{{ 32 | oo_generate_secret }}" when: - glusterfs_heketi_is_native - - glusterfs_heketi_url is undefined + - glusterfs_heketi_admin_key is undefined -- name: Set heketi URL +- name: Generate heketi user key set_fact: - glusterfs_heketi_url: "{{ heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip }}:{{ heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port }}" + glusterfs_heketi_user_key: "{{ 32 | oo_generate_secret }}" + until: "glusterfs_heketi_user_key != glusterfs_heketi_admin_key" + delay: 1 + retries: 10 when: - glusterfs_heketi_is_native - - glusterfs_heketi_url is undefined + - glusterfs_heketi_user_key is undefined + +- include: heketi_deploy_part1.yml + when: + - glusterfs_heketi_is_native + - glusterfs_heketi_deploy_is_missing + - glusterfs_heketi_is_missing + +- name: Set heketi-cli command + set_fact: + glusterfs_heketi_client: "{% if glusterfs_heketi_is_native %}oc rsh {{ heketi_pod.results.results[0]['items'][0]['metadata']['name'] }} {% endif %}heketi-cli -s http://{% if glusterfs_heketi_is_native %}localhost:8080{% else %}{{ glusterfs_heketi_url }}:{{ glusterfs_heketi_port }}{% endif %} --user admin --secret '{{ glusterfs_heketi_admin_key }}'" - name: Verify heketi service - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' cluster list" + command: "{{ glusterfs_heketi_client }} cluster list" changed_when: False -- name: Generate topology file - template: - src: "{{ openshift.common.examples_content_version }}/topology.json.j2" - dest: "{{ mktemp.stdout }}/topology.json" - when: - - glusterfs_heketi_topology_load - - name: Load heketi topology - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' topology load --json={{ mktemp.stdout }}/topology.json 2>&1" + command: "{{ glusterfs_heketi_client }} topology load --json={{ mktemp.stdout }}/topology.json 2>&1" register: topology_load failed_when: "topology_load.rc != 0 or 'Unable' in topology_load.stdout" when: @@ -164,3 +165,51 @@ when: - glusterfs_heketi_is_native - glusterfs_heketi_is_missing + +- name: Create heketi secret + oc_secret: + namespace: "{{ glusterfs_namespace }}" + state: present + name: "heketi-{{ glusterfs_name }}-secret" + type: "kubernetes.io/glusterfs" + force: True + contents: + - path: key + data: "{{ glusterfs_heketi_admin_key }}" + when: + - glusterfs_storageclass + +- name: Get heketi route + oc_obj: + namespace: "{{ glusterfs_namespace }}" + kind: route + state: list + name: "heketi-{{ glusterfs_name }}" + register: heketi_route + when: + - glusterfs_storageclass + - glusterfs_heketi_is_native + +- name: Determine StorageClass heketi URL + set_fact: + glusterfs_heketi_route: "{{ heketi_route.results.results[0]['spec']['host'] }}" + when: + - glusterfs_storageclass + - glusterfs_heketi_is_native + +- name: Generate GlusterFS StorageClass file + template: + src: "{{ openshift.common.examples_content_version }}/glusterfs-storageclass.yml.j2" + dest: "{{ mktemp.stdout }}/glusterfs-storageclass.yml" + when: + - glusterfs_storageclass + +- name: Create GlusterFS StorageClass + oc_obj: + state: present + kind: storageclass + name: "glusterfs-{{ glusterfs_name }}" + files: + - "{{ mktemp.stdout }}/glusterfs-storageclass.yml" + when: + - glusterfs_storageclass diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml index 451990240..dbfe126a4 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml @@ -3,7 +3,9 @@ glusterfs_timeout: "{{ openshift_storage_glusterfs_timeout }}" glusterfs_namespace: "{{ openshift_storage_glusterfs_namespace }}" glusterfs_is_native: "{{ openshift_storage_glusterfs_is_native }}" - glusterfs_nodeselector: "{{ openshift_storage_glusterfs_nodeselector | map_from_pairs }}" + glusterfs_name: "{{ openshift_storage_glusterfs_name }}" + glusterfs_nodeselector: "{{ openshift_storage_glusterfs_nodeselector | default(['storagenode', openshift_storage_glusterfs_name] | join('=')) | map_from_pairs }}" + glusterfs_storageclass: "{{ openshift_storage_glusterfs_storageclass }}" glusterfs_image: "{{ openshift_storage_glusterfs_image }}" glusterfs_version: "{{ openshift_storage_glusterfs_version }}" glusterfs_wipe: "{{ openshift_storage_glusterfs_wipe }}" @@ -17,6 +19,7 @@ glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_heketi_topology_load }}" glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_heketi_wipe }}" glusterfs_heketi_url: "{{ openshift_storage_glusterfs_heketi_url }}" - glusterfs_nodes: "{{ g_glusterfs_hosts }}" + glusterfs_heketi_port: "{{ openshift_storage_glusterfs_heketi_port }}" + glusterfs_nodes: "{{ groups.glusterfs }}" - include: glusterfs_common.yml diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml index 579112349..ea4dcc510 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml @@ -1,23 +1,24 @@ --- - assert: - that: "glusterfs_nodeselector.keys() | count == 1" - msg: Only one GlusterFS nodeselector key pair should be provided - -- assert: that: "glusterfs_nodes | count >= 3" msg: There must be at least three GlusterFS nodes specified - name: Delete pre-existing GlusterFS resources oc_obj: namespace: "{{ glusterfs_namespace }}" - kind: "template,daemonset" - name: glusterfs + kind: "{{ item.kind }}" + name: "{{ item.name }}" state: absent + with_items: + - kind: template + name: glusterfs + - kind: daemonset + name: "glusterfs-{{ glusterfs_name }}" when: glusterfs_wipe - name: Unlabel any existing GlusterFS nodes oc_label: - name: "{{ item }}" + name: "{{ hostvars[item].openshift.common.hostname }}" kind: node state: absent labels: "{{ glusterfs_nodeselector | oo_dict_to_list_of_dict }}" @@ -40,11 +41,16 @@ failed_when: False when: glusterfs_wipe - # Runs "vgremove -fy <vg>; pvremove -fy <pv>" for every device found to be a physical volume. + # Runs "lvremove -ff <vg>; vgremove -fy <vg>; pvremove -fy <pv>" for every device found to be a physical volume. - name: Clear GlusterFS storage device contents - shell: "{% for line in item.stdout_lines %}{% set fields = line.split() %}{% if fields | count > 1 %}vgremove -fy {{ fields[1] }}; {% endif %}pvremove -fy {{ fields[0] }}; {% endfor %}" + shell: "{% for line in item.stdout_lines %}{% set fields = line.split() %}{% if fields | count > 1 %}lvremove -ff {{ fields[1] }}; vgremove -fy {{ fields[1] }}; {% endif %}pvremove -fy {{ fields[0] }}; {% endfor %}" delegate_to: "{{ item.item }}" with_items: "{{ devices_info.results }}" + register: clear_devices + until: + - "'contains a filesystem in use' not in clear_devices.stderr" + delay: 1 + retries: 30 when: - glusterfs_wipe - item.stdout_lines | count > 0 @@ -61,13 +67,11 @@ - name: Label GlusterFS nodes oc_label: - name: "{{ glusterfs_host }}" + name: "{{ hostvars[item].openshift.common.hostname }}" kind: node state: add labels: "{{ glusterfs_nodeselector | oo_dict_to_list_of_dict }}" with_items: "{{ glusterfs_nodes | default([]) }}" - loop_control: - loop_var: glusterfs_host - name: Copy GlusterFS DaemonSet template copy: @@ -78,7 +82,7 @@ oc_obj: namespace: "{{ glusterfs_namespace }}" kind: template - name: glusterfs + name: "glusterfs" state: present files: - "{{ mktemp.stdout }}/glusterfs-template.yml" @@ -91,17 +95,19 @@ params: IMAGE_NAME: "{{ glusterfs_image }}" IMAGE_VERSION: "{{ glusterfs_version }}" + NODE_LABELS: "{{ glusterfs_nodeselector }}" + CLUSTER_NAME: "{{ glusterfs_name }}" - name: Wait for GlusterFS pods oc_obj: namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs-node=pod" + selector: "glusterfs={{ glusterfs_name }}-pod" register: glusterfs_pods until: - "glusterfs_pods.results.results[0]['items'] | count > 0" # There must be as many pods with 'Ready' staus True as there are nodes expecting those pods - "glusterfs_pods.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count == glusterfs_nodes | count" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml index 392f4b65b..0849f2a2e 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml @@ -3,7 +3,9 @@ glusterfs_timeout: "{{ openshift_storage_glusterfs_registry_timeout }}" glusterfs_namespace: "{{ openshift_storage_glusterfs_registry_namespace }}" glusterfs_is_native: "{{ openshift_storage_glusterfs_registry_is_native }}" - glusterfs_nodeselector: "{{ openshift_storage_glusterfs_registry_nodeselector | map_from_pairs }}" + glusterfs_name: "{{ openshift_storage_glusterfs_registry_name }}" + glusterfs_nodeselector: "{{ openshift_storage_glusterfs_registry_nodeselector | default(['storagenode', openshift_storage_glusterfs_registry_name] | join('=')) | map_from_pairs }}" + glusterfs_storageclass: "{{ openshift_storage_glusterfs_registry_storageclass }}" glusterfs_image: "{{ openshift_storage_glusterfs_registry_image }}" glusterfs_version: "{{ openshift_storage_glusterfs_registry_version }}" glusterfs_wipe: "{{ openshift_storage_glusterfs_registry_wipe }}" @@ -17,21 +19,23 @@ glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_registry_heketi_topology_load }}" glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_registry_heketi_wipe }}" glusterfs_heketi_url: "{{ openshift_storage_glusterfs_registry_heketi_url }}" - glusterfs_nodes: "{{ g_glusterfs_registry_hosts }}" + glusterfs_heketi_port: "{{ openshift_storage_glusterfs_registry_heketi_port }}" + glusterfs_nodes: "{{ groups.glusterfs_registry | default(groups.glusterfs) }}" - include: glusterfs_common.yml - when: g_glusterfs_registry_hosts != g_glusterfs_hosts + when: + - glusterfs_nodes | default([]) | count > 0 + - "'glusterfs' not in groups or glusterfs_nodes != groups.glusterfs" - name: Delete pre-existing GlusterFS registry resources oc_obj: namespace: "{{ glusterfs_namespace }}" kind: "{{ item.kind }}" - name: "{{ item.name | default(omit) }}" - selector: "{{ item.selector | default(omit) }}" + name: "{{ item.name }}" state: absent with_items: - - kind: "svc,ep" - name: "glusterfs-registry-endpoints" + - kind: "svc" + name: "glusterfs-{{ glusterfs_name }}-endpoints" failed_when: False - name: Generate GlusterFS registry endpoints @@ -40,8 +44,8 @@ dest: "{{ mktemp.stdout }}/glusterfs-registry-endpoints.yml" - name: Copy GlusterFS registry service - copy: - src: "{{ openshift.common.examples_content_version }}/glusterfs-registry-service.yml" + template: + src: "{{ openshift.common.examples_content_version }}/glusterfs-registry-service.yml.j2" dest: "{{ mktemp.stdout }}/glusterfs-registry-service.yml" - name: Create GlusterFS registry endpoints @@ -49,7 +53,7 @@ namespace: "{{ glusterfs_namespace }}" state: present kind: endpoints - name: glusterfs-registry-endpoints + name: "glusterfs-{{ glusterfs_name }}-endpoints" files: - "{{ mktemp.stdout }}/glusterfs-registry-endpoints.yml" @@ -58,14 +62,14 @@ namespace: "{{ glusterfs_namespace }}" state: present kind: service - name: glusterfs-registry-endpoints + name: "glusterfs-{{ glusterfs_name }}-endpoints" files: - "{{ mktemp.stdout }}/glusterfs-registry-service.yml" - name: Check if GlusterFS registry volume exists - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' volume list" + command: "{{ glusterfs_heketi_client }} volume list" register: registry_volume - name: Create GlusterFS registry volume - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' volume create --size={{ openshift.hosted.registry.storage.volume.size | replace('Gi','') }} --name={{ openshift.hosted.registry.storage.glusterfs.path }}" + command: "{{ glusterfs_heketi_client }} volume create --size={{ openshift.hosted.registry.storage.volume.size | replace('Gi','') }} --name={{ openshift.hosted.registry.storage.glusterfs.path }}" when: "openshift.hosted.registry.storage.glusterfs.path not in registry_volume.stdout" diff --git a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml index c14fcfb15..ea9b1fe1f 100644 --- a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml +++ b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml @@ -6,11 +6,21 @@ with_items: - "deploy-heketi-template.yml" -- name: Create deploy-heketi resources +- name: Create heketi topology secret + oc_secret: + namespace: "{{ glusterfs_namespace }}" + state: present + name: "heketi-{{ glusterfs_name }}-topology-secret" + force: True + files: + - name: topology.json + path: "{{ mktemp.stdout }}/topology.json" + +- name: Create deploy-heketi template oc_obj: namespace: "{{ glusterfs_namespace }}" kind: template - name: deploy-heketi + name: "deploy-heketi" state: present files: - "{{ mktemp.stdout }}/deploy-heketi-template.yml" @@ -23,19 +33,23 @@ params: IMAGE_NAME: "{{ glusterfs_heketi_image }}" IMAGE_VERSION: "{{ glusterfs_heketi_version }}" + HEKETI_ROUTE: "{{ glusterfs_heketi_url | default(['heketi-',glusterfs_name]|join) }}" HEKETI_USER_KEY: "{{ glusterfs_heketi_user_key }}" HEKETI_ADMIN_KEY: "{{ glusterfs_heketi_admin_key }}" + HEKETI_KUBE_NAMESPACE: "{{ glusterfs_namespace }}" + CLUSTER_NAME: "{{ glusterfs_name }}" + TOPOLOGY_PATH: "{{ mktemp.stdout }}" - name: Wait for deploy-heketi pod oc_obj: namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=deploy-heketi-pod,deploy-heketi=support" + selector: "glusterfs=deploy-heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: - "heketi_pod.results.results[0]['items'] | count > 0" # Pod's 'Ready' status must be True - "heketi_pod.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count == 1" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" diff --git a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml index 64410a9ab..26343b909 100644 --- a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml +++ b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml @@ -1,8 +1,10 @@ --- - name: Create heketi DB volume - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' setup-openshift-heketi-storage --listfile {{ mktemp.stdout }}/heketi-storage.json" + command: "{{ glusterfs_heketi_client }} setup-openshift-heketi-storage --listfile /tmp/heketi-storage.json" register: setup_storage - failed_when: False + +- name: Copy heketi-storage list + shell: "{{ openshift.common.client_binary }} rsh {{ heketi_pod.results.results[0]['items'][0]['metadata']['name'] }} cat /tmp/heketi-storage.json > {{ mktemp.stdout }}/heketi-storage.json" # This is used in the subsequent task - name: Copy the admin client config @@ -28,7 +30,7 @@ # Pod's 'Complete' status must be True - "heketi_job.results.results | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Complete'}) | map('bool') | select | list | count == 1" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" failed_when: - "'results' in heketi_job.results" - "heketi_job.results.results | count > 0" @@ -46,14 +48,45 @@ with_items: - kind: "template,route,service,jobs,dc,secret" selector: "deploy-heketi" - failed_when: False + - kind: "svc" + name: "heketi-storage-endpoints" + - kind: "secret" + name: "heketi-{{ glusterfs_name }}-topology-secret" + +- name: Generate heketi endpoints + template: + src: "{{ openshift.common.examples_content_version }}/heketi-endpoints.yml.j2" + dest: "{{ mktemp.stdout }}/heketi-endpoints.yml" + +- name: Generate heketi service + template: + src: "{{ openshift.common.examples_content_version }}/heketi-service.yml.j2" + dest: "{{ mktemp.stdout }}/heketi-service.yml" + +- name: Create heketi endpoints + oc_obj: + namespace: "{{ glusterfs_namespace }}" + state: present + kind: endpoints + name: "heketi-db-{{ glusterfs_name }}-endpoints" + files: + - "{{ mktemp.stdout }}/heketi-endpoints.yml" + +- name: Create heketi service + oc_obj: + namespace: "{{ glusterfs_namespace }}" + state: present + kind: service + name: "heketi-db-{{ glusterfs_name }}-endpoints" + files: + - "{{ mktemp.stdout }}/heketi-service.yml" - name: Copy heketi template copy: src: "{{ openshift.common.examples_content_version }}/heketi-template.yml" dest: "{{ mktemp.stdout }}/heketi-template.yml" -- name: Create heketi resources +- name: Create heketi template oc_obj: namespace: "{{ glusterfs_namespace }}" kind: template @@ -70,40 +103,30 @@ params: IMAGE_NAME: "{{ glusterfs_heketi_image }}" IMAGE_VERSION: "{{ glusterfs_heketi_version }}" + HEKETI_ROUTE: "{{ glusterfs_heketi_url | default(['heketi-',glusterfs_name]|join) }}" HEKETI_USER_KEY: "{{ glusterfs_heketi_user_key }}" HEKETI_ADMIN_KEY: "{{ glusterfs_heketi_admin_key }}" + HEKETI_KUBE_NAMESPACE: "{{ glusterfs_namespace }}" + CLUSTER_NAME: "{{ glusterfs_name }}" - name: Wait for heketi pod oc_obj: namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=heketi-pod" + selector: "glusterfs=heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: - "heketi_pod.results.results[0]['items'] | count > 0" # Pod's 'Ready' status must be True - "heketi_pod.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count == 1" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" - -- name: Determine heketi URL - oc_obj: - namespace: "{{ glusterfs_namespace }}" - state: list - kind: ep - selector: "glusterfs=heketi-service" - register: heketi_url - until: - - "heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip != ''" - - "heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port != ''" - delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" -- name: Set heketi URL +- name: Set heketi-cli command set_fact: - glusterfs_heketi_url: "{{ heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip }}:{{ heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port }}" + glusterfs_heketi_client: "oc rsh {{ heketi_pod.results.results[0]['items'][0]['metadata']['name'] }} heketi-cli -s http://localhost:8080 --user admin --secret '{{ glusterfs_heketi_admin_key }}'" - name: Verify heketi service - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' cluster list" + command: "{{ glusterfs_heketi_client }} cluster list" changed_when: False diff --git a/roles/openshift_storage_glusterfs/tasks/main.yml b/roles/openshift_storage_glusterfs/tasks/main.yml index ebd8db453..d2d8c6c10 100644 --- a/roles/openshift_storage_glusterfs/tasks/main.yml +++ b/roles/openshift_storage_glusterfs/tasks/main.yml @@ -7,12 +7,11 @@ - include: glusterfs_config.yml when: - - g_glusterfs_hosts | default([]) | count > 0 + - groups.glusterfs | default([]) | count > 0 - include: glusterfs_registry.yml when: - - g_glusterfs_registry_hosts | default([]) | count > 0 - - "openshift.hosted.registry.storage.kind == 'glusterfs' or openshift.hosted.registry.glusterfs.swap" + - "groups.glusterfs_registry | default([]) | count > 0 or openshift.hosted.registry.storage.kind == 'glusterfs' or openshift.hosted.registry.storage.glusterfs.swap" - name: Delete temp directory file: diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 index 605627ab5..11c9195bb 100644 --- a/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 +++ b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 @@ -1,7 +1,8 @@ +--- apiVersion: v1 kind: Endpoints metadata: - name: glusterfs-registry-endpoints + name: glusterfs-{{ glusterfs_name }}-endpoints subsets: - addresses: {% for node in glusterfs_nodes %} diff --git a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-registry-service.yml b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-service.yml.j2 index 3f8d8f507..3f869d2b7 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-registry-service.yml +++ b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-service.yml.j2 @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: - name: glusterfs-registry-endpoints + name: glusterfs-{{ glusterfs_name }}-endpoints spec: ports: - port: 1 diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-storageclass.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-storageclass.yml.j2 new file mode 100644 index 000000000..5ea801e60 --- /dev/null +++ b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-storageclass.yml.j2 @@ -0,0 +1,11 @@ +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: glusterfs-{{ glusterfs_name }} +provisioner: kubernetes.io/glusterfs +parameters: + resturl: "http://{% if glusterfs_heketi_is_native %}{{ glusterfs_heketi_route }}{% else %}{{ glusterfs_heketi_url }}:{{ glusterfs_heketi_port }}{% endif %}" + restuser: "admin" + secretNamespace: "{{ glusterfs_namespace }}" + secretName: "heketi-{{ glusterfs_name }}-secret" diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/heketi-endpoints.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-endpoints.yml.j2 new file mode 100644 index 000000000..99cbdf748 --- /dev/null +++ b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-endpoints.yml.j2 @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Endpoints +metadata: + name: heketi-db-{{ glusterfs_name }}-endpoints +subsets: +- addresses: +{% for node in glusterfs_nodes %} + - ip: {{ hostvars[node].glusterfs_ip | default(hostvars[node].openshift.common.ip) }} +{% endfor %} + ports: + - port: 1 diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/heketi-service.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-service.yml.j2 new file mode 100644 index 000000000..dcb896441 --- /dev/null +++ b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-service.yml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: heketi-db-{{ glusterfs_name }}-endpoints +spec: + ports: + - port: 1 +status: + loadBalancer: {} |