diff options
122 files changed, 3270 insertions, 369 deletions
diff --git a/.redhat-ci.inventory b/.papr.inventory index 23bc9923c..23bc9923c 100644 --- a/.redhat-ci.inventory +++ b/.papr.inventory diff --git a/.redhat-ci.sh b/.papr.sh index fce8c1d52..decca625f 100755 --- a/.redhat-ci.sh +++ b/.papr.sh @@ -1,10 +1,12 @@ #!/bin/bash set -xeuo pipefail +echo "Targeting OpenShift Origin $OPENSHIFT_IMAGE_TAG" + pip install -r requirements.txt # ping the nodes to check they're responding and register their ostree versions -ansible -vvv -i .redhat-ci.inventory nodes -a 'rpm-ostree status' +ansible -vvv -i .papr.inventory nodes -a 'rpm-ostree status' upload_journals() { mkdir journals @@ -16,7 +18,9 @@ upload_journals() { trap upload_journals ERR # run the actual installer -ansible-playbook -vvv -i .redhat-ci.inventory playbooks/byo/config.yml +# FIXME: override openshift_image_tag defined in the inventory until +# https://github.com/openshift/openshift-ansible/issues/4478 is fixed. +ansible-playbook -vvv -i .papr.inventory playbooks/byo/config.yml -e "openshift_image_tag=$OPENSHIFT_IMAGE_TAG" # run a small subset of origin conformance tests to sanity # check the cluster NB: we run it on the master since we may diff --git a/.papr.yml b/.papr.yml new file mode 100644 index 000000000..16d6e78b1 --- /dev/null +++ b/.papr.yml @@ -0,0 +1,42 @@ +--- + +# This YAML file is used by PAPR. It details the test +# environment to provision and the test procedure. For more +# information on PAPR, see: +# +# https://github.com/projectatomic/papr +# +# The PAPR YAML specification detailing allowed fields can +# be found at: +# +# https://github.com/projectatomic/papr/blob/master/sample.papr.yml + +cluster: + hosts: + - name: ocp-master + distro: fedora/25/atomic + - name: ocp-node1 + distro: fedora/25/atomic + - name: ocp-node2 + distro: fedora/25/atomic + container: + image: fedora:25 + +packages: + - gcc + - python-pip + - python-devel + - libffi-devel + - openssl-devel + - redhat-rpm-config + +context: 'fedora/25/atomic' + +env: + OPENSHIFT_IMAGE_TAG: v3.6.0-alpha.1 + +tests: + - ./.papr.sh + +artifacts: + - journals/ diff --git a/.redhat-ci.yml b/.redhat-ci.yml deleted file mode 100644 index 6dac7b256..000000000 --- a/.redhat-ci.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- - -cluster: - hosts: - - name: ocp-master - distro: fedora/25/atomic - - name: ocp-node1 - distro: fedora/25/atomic - - name: ocp-node2 - distro: fedora/25/atomic - container: - image: fedora:25 - -packages: - - gcc - - python-pip - - python-devel - - openssl-devel - - redhat-rpm-config - -context: 'fedora/25/atomic | origin/v3.6.0-alpha.1' - -env: - OPENSHIFT_IMAGE_TAG: v3.6.0-alpha.1 - -tests: - - ./.redhat-ci.sh - -artifacts: - - journals/ diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible index 2f89bc67f..98e277c19 100644 --- a/.tito/packages/openshift-ansible +++ b/.tito/packages/openshift-ansible @@ -1 +1 @@ -3.6.109-1 ./ +3.6.117-1 ./ diff --git a/docs/pull_requests.md b/docs/pull_requests.md index fcc3e275c..45ae01a9d 100644 --- a/docs/pull_requests.md +++ b/docs/pull_requests.md @@ -10,8 +10,8 @@ Whenever a [Pull Request is opened](../CONTRIBUTING.md#submitting-contributions), some automated test jobs must be successfully run before the PR can be merged. -Some of these jobs are automatically triggered, e.g., Travis and Coveralls. -Other jobs need to be manually triggered by a member of the +Some of these jobs are automatically triggered, e.g., Travis, PAPR, and +Coveralls. Other jobs need to be manually triggered by a member of the [Team OpenShift Ansible Contributors](https://github.com/orgs/openshift/teams/team-openshift-ansible-contributors). ## Triggering tests @@ -48,9 +48,9 @@ simplifying the workflow towards a single infrastructure in the future. There are a set of tests that run on Fedora infrastructure. They are started automatically with every pull request. -They are implemented using the [`redhat-ci` framework](https://github.com/jlebon/redhat-ci). +They are implemented using the [`PAPR` framework](https://github.com/projectatomic/papr). -To re-run tests, write a comment containing `bot, retest this please`. +To re-run tests, write a comment containing only `bot, retest this please`. ## Triggering merge diff --git a/docs/repo_structure.md b/docs/repo_structure.md index 693837fba..f598f22c3 100644 --- a/docs/repo_structure.md +++ b/docs/repo_structure.md @@ -52,3 +52,16 @@ These are plugins used in playbooks and roles: . └── test Contains tests. ``` + +### CI + +These files are used by [PAPR](https://github.com/projectatomic/papr), +It is very similar in workflow to Travis, with the test +environment and test scripts defined in a YAML file. + +``` +. +├── .papr.yml +├── .papr.sh +└── .papr.inventory +``` diff --git a/images/installer/system-container/README.md b/images/installer/system-container/README.md index dc95307e5..fbcd47c4a 100644 --- a/images/installer/system-container/README.md +++ b/images/installer/system-container/README.md @@ -11,3 +11,21 @@ These files are needed to run the installer using an [Atomic System container](h * service.template - Template file for the systemd service. * tmpfiles.template - Template file for systemd-tmpfiles. + +## Options + +These options may be set via the ``atomic`` ``--set`` flag. For defaults see ``root/exports/manifest.json`` + +* OPTS - Additional options to pass to ansible when running the installer + +* VAR_LIB_OPENSHIFT_INSTALLER - Full path of the installer code to mount into the container + +* VAR_LOG_OPENSHIFT_LOG - Full path of the log file to mount into the container + +* PLAYBOOK_FILE - Full path of the playbook inside the container + +* HOME_ROOT - Full path on host to mount as the root home directory inside the container (for .ssh/, etc..) + +* ANSIBLE_CONFIG - Full path for the ansible configuration file to use inside the container + +* INVENTORY_FILE - Full path for the inventory to use from the host diff --git a/images/installer/system-container/root/exports/config.json.template b/images/installer/system-container/root/exports/config.json.template index 397ac941a..739c0080f 100644 --- a/images/installer/system-container/root/exports/config.json.template +++ b/images/installer/system-container/root/exports/config.json.template @@ -21,7 +21,8 @@ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm", "OPTS=$OPTS", - "PLAYBOOK_FILE=$PLAYBOOK_FILE" + "PLAYBOOK_FILE=$PLAYBOOK_FILE", + "ANSIBLE_CONFIG=$ANSIBLE_CONFIG" ], "cwd": "/opt/app-root/src/", "rlimits": [ diff --git a/images/installer/system-container/root/exports/manifest.json b/images/installer/system-container/root/exports/manifest.json index f735494d4..321a84ee8 100644 --- a/images/installer/system-container/root/exports/manifest.json +++ b/images/installer/system-container/root/exports/manifest.json @@ -6,6 +6,7 @@ "VAR_LOG_OPENSHIFT_LOG": "/var/log/ansible.log", "PLAYBOOK_FILE": "/usr/share/ansible/openshift-ansible/playbooks/byo/config.yml", "HOME_ROOT": "/root", + "ANSIBLE_CONFIG": "/usr/share/ansible/openshift-ansible/ansible.cfg", "INVENTORY_FILE": "/dev/null" } } diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 86b4de4b7..962a01a91 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -191,6 +191,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # or #openshift_master_request_header_ca_file=<path to local ca file to use> +# CloudForms Management Engine (ManageIQ) App Install +# +# Enables installation of MIQ server. Recommended for dedicated +# clusters only. See roles/openshift_cfme/README.md for instructions +# and requirements. +#openshift_cfme_install_app=False + # Cloud Provider Configuration # # Note: You may make use of environment variables rather than store @@ -786,6 +793,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Enable API service auditing, available as of 1.3 #openshift_master_audit_config={"enabled": true} +# +# In case you want more advanced setup for the auditlog you can +# use this line. +# The directory in "auditFilePath" will be created if it's not +# exist +#openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5}} # Enable origin repos that point at Centos PAAS SIG, defaults to true, only used # by deployment_type=origin diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index cbaf22810..63f1f00d2 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -190,6 +190,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # or #openshift_master_request_header_ca_file=<path to local ca file to use> +# CloudForms Management Engine (ManageIQ) App Install +# +# Enables installation of MIQ server. Recommended for dedicated +# clusters only. See roles/openshift_cfme/README.md for instructions +# and requirements. +#openshift_cfme_install_app=False + # Cloud Provider Configuration # # Note: You may make use of environment variables rather than store @@ -786,6 +793,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Enable API service auditing, available as of 3.2 #openshift_master_audit_config={"enabled": true} +# +# In case you want more advanced setup for the auditlog you can +# use this line. +# The directory in "auditFilePath" will be created if it's not +# exist +#openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5}} # Validity of the auto-generated OpenShift certificates in days. # See also openshift_hosted_registry_cert_expire_days above. diff --git a/openshift-ansible.spec b/openshift-ansible.spec index ec7429291..7b5587294 100644 --- a/openshift-ansible.spec +++ b/openshift-ansible.spec @@ -9,7 +9,7 @@ %global __requires_exclude ^/usr/bin/ansible-playbook$ Name: openshift-ansible -Version: 3.6.109 +Version: 3.6.117 Release: 1%{?dist} Summary: Openshift and Atomic Enterprise Ansible License: ASL 2.0 @@ -280,6 +280,84 @@ Atomic OpenShift Utilities includes %changelog +* Mon Jun 19 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.117-1 +- Run storage upgrade pre and post master upgrade (rteague@redhat.com) +- Introduce etcd migrate role (jchaloup@redhat.com) +- Add support for rhel, aci, vxlan (srampal@cisco.com) + +* Sun Jun 18 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.116-1 +- PAPR: define openshift_image_tag via command line (rhcarvalho@gmail.com) +- Ensure only one ES pod per PV (peter.portante@redhat.com) +- etcd v3 for clean installs (sdodson@redhat.com) +- Rename cockpit-shell -> cockpit-system (rhcarvalho@gmail.com) +- Update image repo name, images have been moved from 'cloudforms' to + 'cloudforms42' for CF 4.2. (simaishi@redhat.com) +- Update image repo name, images have been moved from 'cloudforms' to + 'cloudforms45' for CF 4.5. (simaishi@redhat.com) +- CloudForms 4.5 templates (simaishi@redhat.com) + +* Fri Jun 16 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.114-1 +- + +* Fri Jun 16 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.113-1 +- Make rollout status check best-effort, add poll (skuznets@redhat.com) +- Verify the rollout status of the hosted router and registry + (skuznets@redhat.com) +- fix es routes for new logging roles (rmeggins@redhat.com) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.112-1 +- Add the the other featured audit-config paramters as example (al- + git001@none.at) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.111-1 +- doc: Info for system container installer options (smilner@redhat.com) +- Add ANSIBLE_CONFIG to system container installer (smilner@redhat.com) +- Add missing file. Remove debugging prompt. (tbielawa@redhat.com) +- Update readme one last time (tbielawa@redhat.com) +- Reconfigure masters in serial to avoid HA meltdowns (tbielawa@redhat.com) +- First POC of a CFME turnkey solution in openshift-anisble + (tbielawa@redhat.com) +- Reverted most of this pr 4356 except: adding + openshift_logging_fluentd_buffer_queue_limit: 1024 + openshift_logging_fluentd_buffer_size_limit: 1m + openshift_logging_mux_buffer_queue_limit: 1024 + openshift_logging_mux_buffer_size_limit: 1m and setting the matched + environment variables. (nhosoi@redhat.com) +- Adding the defaults for openshift_logging_fluentd_{cpu,memory}_limit to + roles/openshift_logging_fluentd/defaults/main.yml. (nhosoi@redhat.com) +- Adding environment variables FLUENTD_CPU_LIMIT, FLUENTD_MEMORY_LIMIT, + MUX_CPU_LIMIT, MUX_MEMORY_LIMIT. (nhosoi@redhat.com) +- Introducing fluentd/mux buffer_queue_limit, buffer_size_limit, cpu_limit, and + memory_limit. (nhosoi@redhat.com) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.110-1 +- papr: add documentation to YAML and simplify context (jlebon@redhat.com) +- docs: better documentation for PAPR (jlebon@redhat.com) +- papr: install libffi-devel (jlebon@redhat.com) +- pre-install checks: add more during byo install (lmeyer@redhat.com) +- move etcd backup to etcd_common role (jchaloup@redhat.com) +- Support installing HOSA via ansible (mwringe@redhat.com) +- GlusterFS: Remove requirement for heketi-cli (jarrpa@redhat.com) +- GlusterFS: Fix bugs in wipe (jarrpa@redhat.com) +- GlusterFS: Skip heketi-cli install on Atomic (jarrpa@redhat.com) +- GlusterFS: Create a StorageClass if specified (jarrpa@redhat.com) +- GlusterFS: Use proper secrets (jarrpa@redhat.com) +- GlusterFS: Allow cleaner separation of multiple clusters (jarrpa@redhat.com) +- GlusterFS: Minor corrections and cleanups (jarrpa@redhat.com) +- GlusterFS: Improve documentation (jarrpa@redhat.com) +- GlusterFS: Allow configuration of kube namespace for heketi + (jarrpa@redhat.com) +- GlusterFS: Adjust when clauses for registry config (jarrpa@redhat.com) +- GlusterFS: Allow failure reporting when deleting deploy-heketi + (jarrpa@redhat.com) +- GlusterFS: Tweak pod probe parameters (jarrpa@redhat.com) +- GlusterFS: Allow for configuration of node selector (jarrpa@redhat.com) +- GlusterFS: Label on Openshift node name (jarrpa@redhat.com) +- GlusterFS: Make sure timeout is an int (jarrpa@redhat.com) +- GlusterFS: Use groups variables (jarrpa@redhat.com) +- papr: rename redhat-ci related files to papr (jlebon@redhat.com) +- singletonize some role tasks that repeat a lot (lmeyer@redhat.com) + * Wed Jun 14 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.109-1 - diff --git a/playbooks/adhoc/contiv/delete_contiv.yml b/playbooks/adhoc/contiv/delete_contiv.yml index 91948c72e..eec6c23a7 100644 --- a/playbooks/adhoc/contiv/delete_contiv.yml +++ b/playbooks/adhoc/contiv/delete_contiv.yml @@ -1,5 +1,5 @@ --- -- name: delete contiv +- name: Uninstall contiv hosts: all gather_facts: False tasks: diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 97d835eae..27c3a9edd 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -103,7 +103,7 @@ - atomic-openshift-sdn-ovs - cockpit-bridge - cockpit-docker - - cockpit-shell + - cockpit-system - cockpit-ws - kubernetes-client - openshift @@ -346,7 +346,7 @@ - atomic-openshift-master - cockpit-bridge - cockpit-docker - - cockpit-shell + - cockpit-system - cockpit-ws - corosync - kubernetes-client diff --git a/playbooks/byo/openshift-cfme/config.yml b/playbooks/byo/openshift-cfme/config.yml new file mode 100644 index 000000000..0e8e7a94d --- /dev/null +++ b/playbooks/byo/openshift-cfme/config.yml @@ -0,0 +1,8 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml + tags: + - always + +- include: ../../common/openshift-cluster/evaluate_groups.yml + +- include: ../../common/openshift-cfme/config.yml diff --git a/playbooks/byo/openshift-cfme/uninstall.yml b/playbooks/byo/openshift-cfme/uninstall.yml new file mode 100644 index 000000000..c8ed16859 --- /dev/null +++ b/playbooks/byo/openshift-cfme/uninstall.yml @@ -0,0 +1,6 @@ +--- +# - include: ../openshift-cluster/initialize_groups.yml +# tags: +# - always + +- include: ../../common/openshift-cfme/uninstall.yml diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index fd4a9eb26..2372a5322 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -15,6 +15,11 @@ checks: - disk_availability - memory_availability + - package_availability + - package_update + - package_version + - docker_image_availability + - docker_storage - include: ../../common/openshift-cluster/std_include.yml tags: diff --git a/playbooks/byo/openshift-etcd/migrate.yml b/playbooks/byo/openshift-etcd/migrate.yml new file mode 100644 index 000000000..fd02e066e --- /dev/null +++ b/playbooks/byo/openshift-etcd/migrate.yml @@ -0,0 +1,124 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml + tags: + - always + +- include: ../../common/openshift-cluster/evaluate_groups.yml + tags: + - always + +- name: Run pre-checks + hosts: oo_etcd_to_config + tags: + - always + roles: + - role: etcd_migrate + r_etcd_migrate_action: check + etcd_peer: "{{ ansible_default_ipv4.address }}" + +# TODO(jchaloup): replace the std_include with something minimal so the entire playbook is faster +# e.g. I don't need to detect the OCP version, install deps, etc. +- include: ../../common/openshift-cluster/std_include.yml + tags: + - always + +- name: Backup v2 data + hosts: oo_etcd_to_config + gather_facts: no + tags: + - always + roles: + - role: openshift_facts + - role: etcd_common + r_etcd_common_action: backup + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" + r_etcd_common_backup_tag: pre-migration + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.oo_etcd_to_config) + | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.oo_etcd_to_config | difference(etcd_backup_completed) }}" + - fail: + msg: "Migration cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: + - etcd_backup_failed | length > 0 + +- name: Prepare masters for etcd data migration + hosts: oo_masters_to_config + tasks: + - set_fact: + master_services: + - "{{ openshift.common.service_type + '-master' }}" + - set_fact: + master_services: + - "{{ openshift.common.service_type + '-master-controllers' }}" + - "{{ openshift.common.service_type + '-master-api' }}" + when: + - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool + - debug: + msg: "master service name: {{ master_services }}" + - name: Stop masters + service: + name: "{{ item }}" + state: stopped + with_items: "{{ master_services }}" + +- name: Migrate etcd data from v2 to v3 + hosts: oo_etcd_to_config + gather_facts: no + tags: + - always + roles: + - role: etcd_migrate + r_etcd_migrate_action: migrate + etcd_peer: "{{ ansible_default_ipv4.address }}" + +- name: Gate on etcd migration + hosts: oo_masters_to_config + gather_facts: no + tasks: + - set_fact: + etcd_migration_completed: "{{ hostvars + | oo_select_keys(groups.oo_etcd_to_config) + | oo_collect('inventory_hostname', {'r_etcd_migrate_success': true}) }}" + - set_fact: + etcd_migration_failed: "{{ groups.oo_etcd_to_config | difference(etcd_migration_completed) }}" + +- name: Configure masters if etcd data migration is succesfull + hosts: oo_masters_to_config + roles: + - role: etcd_migrate + r_etcd_migrate_action: configure + when: etcd_migration_failed | length == 0 + tasks: + - debug: + msg: "Skipping master re-configuration since migration failed." + when: + - etcd_migration_failed | length > 0 + +- name: Start masters after etcd data migration + hosts: oo_masters_to_config + tasks: + - name: Start master services + service: + name: "{{ item }}" + state: started + register: service_status + # Sometimes the master-api, resp. master-controllers fails to start for the first time + until: service_status.state is defined and service_status.state == "started" + retries: 5 + delay: 10 + with_items: "{{ master_services[::-1] }}" + - fail: + msg: "Migration failed. The following hosts were not properly migrated: {{ etcd_migration_failed | join(',') }}" + when: + - etcd_migration_failed | length > 0 diff --git a/playbooks/common/openshift-cfme/config.yml b/playbooks/common/openshift-cfme/config.yml new file mode 100644 index 000000000..533a35d9e --- /dev/null +++ b/playbooks/common/openshift-cfme/config.yml @@ -0,0 +1,44 @@ +--- +# TODO: Make this work. The 'name' variable below is undefined +# presently because it's part of the cfme role. This play can't run +# until that's re-worked. +# +# - name: Pre-Pull manageiq-pods docker images +# hosts: nodes +# tasks: +# - name: Ensure the latest manageiq-pods docker image is pulling +# docker_image: +# name: "{{ openshift_cfme_container_image }}" +# # Fire-and-forget method, never timeout +# async: 99999999999 +# # F-a-f, never check on this. True 'background' task. +# poll: 0 + +- name: Configure Masters for CFME Bulk Image Imports + hosts: oo_masters_to_config + serial: 1 + tasks: + - name: Run master cfme tuning playbook + include_role: + name: openshift_cfme + tasks_from: tune_masters + +- name: Setup CFME + hosts: oo_first_master + vars: + r_openshift_cfme_miq_template_content: "{{ lookup('file', 'roles/openshift_cfme/files/miq-template.yaml') | from_yaml}}" + pre_tasks: + - name: Create a temporary place to evaluate the PV templates + command: mktemp -d /tmp/openshift-ansible-XXXXXXX + register: r_openshift_cfme_mktemp + changed_when: false + - name: Ensure the server template was read from disk + debug: + msg="{{ r_openshift_cfme_miq_template_content | from_yaml }}" + + tasks: + - name: Run the CFME Setup Role + include_role: + name: openshift_cfme + vars: + template_dir: "{{ hostvars[groups.masters.0].r_openshift_cfme_mktemp.stdout }}" diff --git a/playbooks/common/openshift-cfme/filter_plugins b/playbooks/common/openshift-cfme/filter_plugins new file mode 120000 index 000000000..99a95e4ca --- /dev/null +++ b/playbooks/common/openshift-cfme/filter_plugins @@ -0,0 +1 @@ +../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/library b/playbooks/common/openshift-cfme/library new file mode 120000 index 000000000..ba40d2f56 --- /dev/null +++ b/playbooks/common/openshift-cfme/library @@ -0,0 +1 @@ +../../../library
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/roles b/playbooks/common/openshift-cfme/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/common/openshift-cfme/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/uninstall.yml b/playbooks/common/openshift-cfme/uninstall.yml new file mode 100644 index 000000000..78b8e7668 --- /dev/null +++ b/playbooks/common/openshift-cfme/uninstall.yml @@ -0,0 +1,8 @@ +--- +- name: Uninstall CFME + hosts: masters + tasks: + - name: Run the CFME Uninstall Role Tasks + include_role: + name: openshift_cfme + tasks_from: uninstall diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index b980909eb..5c19df4c5 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -3,6 +3,16 @@ # Upgrade Masters ############################################################################### +# oc adm migrate storage should be run prior to etcd v3 upgrade +# See: https://github.com/openshift/origin/pull/14625#issuecomment-308467060 +- name: Pre master upgrade - Upgrade job storage + hosts: oo_first_master + tasks: + - name: Upgrade job storage + command: > + {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig + migrate storage --include=jobs --confirm + # If facts cache were for some reason deleted, this fact may not be set, and if not set # it will always default to true. This causes problems for the etcd data dir fact detection # so we must first make sure this is set correctly before attempting the backup. @@ -133,6 +143,14 @@ - set_fact: master_update_complete: True +- name: Post master upgrade - Upgrade job storage + hosts: oo_first_master + tasks: + - name: Upgrade job storage + command: > + {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig + migrate storage --include=jobs --confirm + ############################################################################## # Gate on master update complete ############################################################################## diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml deleted file mode 100644 index 48c69eccd..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_5/storage_upgrade.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -############################################################################### -# Post upgrade - Upgrade job storage -############################################################################### -- name: Upgrade job storage - hosts: oo_first_master - roles: - - { role: openshift_cli } - vars: - # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe - # restart. - skip_docker_role: True - tasks: - - name: Upgrade job storage - command: > - {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig - migrate storage --include=jobs --confirm - run_once: true diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml index e63b03e51..4e7c14e94 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade.yml @@ -115,5 +115,3 @@ - include: ../upgrade_nodes.yml - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml index 74c2964aa..45b664d06 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_5/upgrade_control_plane.yml @@ -119,5 +119,3 @@ master_config_hook: "v3_5/master_config_upgrade.yml" - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/storage_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/storage_upgrade.yml deleted file mode 100644 index 48c69eccd..000000000 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/storage_upgrade.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -############################################################################### -# Post upgrade - Upgrade job storage -############################################################################### -- name: Upgrade job storage - hosts: oo_first_master - roles: - - { role: openshift_cli } - vars: - # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe - # restart. - skip_docker_role: True - tasks: - - name: Upgrade job storage - command: > - {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig - migrate storage --include=jobs --confirm - run_once: true diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml index 5d41b84d0..5b9ac9e8f 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml @@ -115,5 +115,3 @@ - include: ../upgrade_nodes.yml - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml index a66fb51ff..a470c7595 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml @@ -119,5 +119,3 @@ master_config_hook: "v3_6/master_config_upgrade.yml" - include: ../post_control_plane.yml - -- include: storage_upgrade.yml diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index ddc4db8f8..429460b2c 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -20,6 +20,15 @@ - node - .config_managed + - name: Check for existing configuration + stat: + path: /etc/origin/master/master-config.yaml + register: master_config_stat + + - name: Set clean install fact + set_fact: + l_clean_install: "{{ not master_config_stat.stat.exists }}" + - set_fact: openshift_master_pod_eviction_timeout: "{{ lookup('oo_option', 'openshift_master_pod_eviction_timeout') | default(none, true) }}" when: openshift_master_pod_eviction_timeout is not defined @@ -122,6 +131,7 @@ etcd_cert_subdir: "openshift-master-{{ openshift.common.hostname }}" etcd_cert_config_dir: "{{ openshift.common.config_base }}/master" etcd_cert_prefix: "master.etcd-" + r_openshift_master_clean_install: hostvars[groups.oo_first_master.0].l_clean_install - role: nuage_master when: openshift.common.use_nuage | bool - role: calico_master diff --git a/roles/cockpit/tasks/main.yml b/roles/cockpit/tasks/main.yml index bddad778f..57f49ea11 100644 --- a/roles/cockpit/tasks/main.yml +++ b/roles/cockpit/tasks/main.yml @@ -3,7 +3,7 @@ package: name={{ item }} state=present with_items: - cockpit-ws - - cockpit-shell + - cockpit-system - cockpit-bridge - cockpit-docker - "{{ cockpit_plugins }}" diff --git a/roles/contiv/defaults/main.yml b/roles/contiv/defaults/main.yml index 1ccae61f2..8c4d19537 100644 --- a/roles/contiv/defaults/main.yml +++ b/roles/contiv/defaults/main.yml @@ -1,12 +1,12 @@ --- # The version of Contiv binaries to use -contiv_version: 1.0.0-beta.3-02-21-2017.20-52-42.UTC +contiv_version: 1.0.1 # The version of cni binaries cni_version: v0.4.0 -contiv_default_subnet: "20.1.1.1/24" -contiv_default_gw: "20.1.1.254" +contiv_default_subnet: "10.128.0.0/16" +contiv_default_gw: "10.128.254.254" # TCP port that Netmaster listens for network connections netmaster_port: 9999 @@ -69,6 +69,9 @@ netplugin_fwd_mode: bridge # Contiv fabric mode aci|default contiv_fabric_mode: default +# Global VLAN range +contiv_vlan_range: "2900-3000" + # Encapsulation type vlan|vxlan to use for instantiating container networks contiv_encap_mode: vlan @@ -78,8 +81,8 @@ netplugin_driver: ovs # Create a default Contiv network for use by pods contiv_default_network: true -# VLAN/ VXLAN tag value to be used for the default network -contiv_default_network_tag: 1 +# Statically configured tag for default network (if needed) +contiv_default_network_tag: "" #SRFIXME (use the openshift variables) https_proxy: "" @@ -95,6 +98,9 @@ apic_leaf_nodes: "" apic_phys_dom: "" apic_contracts_unrestricted_mode: no apic_epg_bridge_domain: not_specified +apic_configure_default_policy: false +apic_default_external_contract: "uni/tn-common/brc-default" +apic_default_app_profile: "contiv-infra-app-profile" is_atomic: False kube_cert_dir: "/data/src/github.com/openshift/origin/openshift.local.config/master" master_name: "{{ groups['masters'][0] }}" @@ -104,3 +110,12 @@ kube_ca_cert: "{{ kube_cert_dir }}/ca.crt" kube_key: "{{ kube_cert_dir }}/admin.key" kube_cert: "{{ kube_cert_dir }}/admin.crt" kube_master_api_port: 8443 + +# contivh1 default subnet and gateway +#contiv_h1_subnet_default: "132.1.1.0/24" +#contiv_h1_gw_default: "132.1.1.1" +contiv_h1_subnet_default: "10.129.0.0/16" +contiv_h1_gw_default: "10.129.0.1" + +# contiv default private subnet for ext access +contiv_private_ext_subnet: "10.130.0.0/16" diff --git a/roles/contiv/meta/main.yml b/roles/contiv/meta/main.yml index 3223afb6e..da6409f1e 100644 --- a/roles/contiv/meta/main.yml +++ b/roles/contiv/meta/main.yml @@ -26,3 +26,5 @@ dependencies: etcd_url_scheme: http etcd_peer_url_scheme: http when: contiv_role == "netmaster" +- role: contiv_auth_proxy + when: (contiv_role == "netmaster") and (contiv_enable_auth_proxy == true) diff --git a/roles/contiv/tasks/default_network.yml b/roles/contiv/tasks/default_network.yml index 9cf98bb80..f679443e0 100644 --- a/roles/contiv/tasks/default_network.yml +++ b/roles/contiv/tasks/default_network.yml @@ -6,10 +6,53 @@ retries: 9 delay: 10 +- name: Contiv | Set globals + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" global set --fabric-mode {{ contiv_fabric_mode }} --vlan-range {{ contiv_vlan_range }} --fwd-mode {{ netplugin_fwd_mode }} --private-subnet {{ contiv_private_ext_subnet }}' + +- name: Contiv | Set arp mode to flood if ACI + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" global set --arp-mode flood' + when: contiv_fabric_mode == "aci" + - name: Contiv | Check if default-net exists command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net ls' register: net_result - name: Contiv | Create default-net - command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_default_subnet }} -e {{ contiv_encap_mode }} -p {{ contiv_default_network_tag }} --gateway={{ contiv_default_gw }} default-net' + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_default_subnet }} -e {{ contiv_encap_mode }} -p {{ contiv_default_network_tag }} --gateway {{ contiv_default_gw }} default-net' when: net_result.stdout.find("default-net") == -1 + +- name: Contiv | Create host access infra network for VxLan routing case + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" net create --subnet={{ contiv_h1_subnet_default }} --gateway={{ contiv_h1_gw_default }} --nw-type="infra" contivh1' + when: (contiv_encap_mode == "vxlan") and (netplugin_fwd_mode == "routing") + +#- name: Contiv | Create an allow-all policy for the default-group +# command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy create ose-allow-all-policy' +# when: contiv_fabric_mode == "aci" + +- name: Contiv | Set up aci external contract to consume default external contract + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" external-contracts create -c -a {{ apic_default_external_contract }} oseExtToConsume' + when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +- name: Contiv | Set up aci external contract to provide default external contract + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" external-contracts create -p -a {{ apic_default_external_contract }} oseExtToProvide' + when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +- name: Contiv | Create aci default-group + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" group create default-net default-group' + when: contiv_fabric_mode == "aci" + +- name: Contiv | Add external contracts to the default-group + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" group create -e oseExtToConsume -e oseExtToProvide default-net default-group' + when: (contiv_fabric_mode == "aci") and (apic_configure_default_policy == true) + +#- name: Contiv | Add policy rule 1 for allow-all policy +# command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy rule-add -d in --action allow ose-allow-all-policy 1' +# when: contiv_fabric_mode == "aci" + +#- name: Contiv | Add policy rule 2 for allow-all policy +# command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" policy rule-add -d out --action allow ose-allow-all-policy 2' +# when: contiv_fabric_mode == "aci" + +- name: Contiv | Create default aci app profile + command: 'netctl --netmaster "http://{{ inventory_hostname }}:{{ netmaster_port }}" app-profile create -g default-group {{ apic_default_app_profile }}' + when: contiv_fabric_mode == "aci" diff --git a/roles/contiv/tasks/netmaster.yml b/roles/contiv/tasks/netmaster.yml index 5057767b8..acaf7386e 100644 --- a/roles/contiv/tasks/netmaster.yml +++ b/roles/contiv/tasks/netmaster.yml @@ -23,7 +23,7 @@ line: "{{ hostvars[item]['ansible_' + netmaster_interface].ipv4.address }} netmaster" state: present when: hostvars[item]['ansible_' + netmaster_interface].ipv4.address is defined - with_items: groups['masters'] + with_items: "{{ groups['masters'] }}" - name: Netmaster | Create netmaster symlinks file: diff --git a/roles/contiv/tasks/netplugin_iptables.yml b/roles/contiv/tasks/netplugin_iptables.yml index 8c348ac67..184c595c5 100644 --- a/roles/contiv/tasks/netplugin_iptables.yml +++ b/roles/contiv/tasks/netplugin_iptables.yml @@ -23,7 +23,36 @@ notify: Save iptables rules - name: Netplugin IPtables | Open vxlan port with iptables - command: /sbin/iptables -I INPUT 1 -p udp --dport 8472 -j ACCEPT -m comment --comment "vxlan" + command: /sbin/iptables -I INPUT 1 -p udp --dport 8472 -j ACCEPT -m comment --comment "netplugin vxlan 8472" + when: iptablesrules.stdout.find("netplugin vxlan 8472") == -1 + notify: Save iptables rules - name: Netplugin IPtables | Open vxlan port with iptables - command: /sbin/iptables -I INPUT 1 -p udp --dport 4789 -j ACCEPT -m comment --comment "vxlan" + command: /sbin/iptables -I INPUT 1 -p udp --dport 4789 -j ACCEPT -m comment --comment "netplugin vxlan 4789" + when: iptablesrules.stdout.find("netplugin vxlan 4789") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow from contivh0 + command: /sbin/iptables -I FORWARD 1 -i contivh0 -j ACCEPT -m comment --comment "contivh0 FORWARD input" + when: iptablesrules.stdout.find("contivh0 FORWARD input") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow to contivh0 + command: /sbin/iptables -I FORWARD 1 -o contivh0 -j ACCEPT -m comment --comment "contivh0 FORWARD output" + when: iptablesrules.stdout.find("contivh0 FORWARD output") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow from contivh1 + command: /sbin/iptables -I FORWARD 1 -i contivh1 -j ACCEPT -m comment --comment "contivh1 FORWARD input" + when: iptablesrules.stdout.find("contivh1 FORWARD input") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow to contivh1 + command: /sbin/iptables -I FORWARD 1 -o contivh1 -j ACCEPT -m comment --comment "contivh1 FORWARD output" + when: iptablesrules.stdout.find("contivh1 FORWARD output") == -1 + notify: Save iptables rules + +- name: Netplugin IPtables | Allow dns + command: /sbin/iptables -I INPUT 1 -p udp --dport 53 -j ACCEPT -m comment --comment "contiv dns" + when: iptablesrules.stdout.find("contiv dns") == -1 + notify: Save iptables rules diff --git a/roles/contiv/tasks/packageManagerInstall.yml b/roles/contiv/tasks/packageManagerInstall.yml index 2eff1b85f..e0d48e643 100644 --- a/roles/contiv/tasks/packageManagerInstall.yml +++ b/roles/contiv/tasks/packageManagerInstall.yml @@ -4,9 +4,10 @@ did_install: false - include: pkgMgrInstallers/centos-install.yml - when: ansible_distribution == "CentOS" and not is_atomic + when: (ansible_os_family == "RedHat") and + not is_atomic - name: Package Manager | Set fact saying we did CentOS package install set_fact: did_install: true - when: ansible_distribution == "CentOS" + when: (ansible_os_family == "RedHat") diff --git a/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml b/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml index 51c3d35ac..91e6aadf3 100644 --- a/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml +++ b/roles/contiv/tasks/pkgMgrInstallers/centos-install.yml @@ -1,13 +1,13 @@ --- -- name: PkgMgr CentOS | Install net-tools pkg for route +- name: PkgMgr RHEL/CentOS | Install net-tools pkg for route yum: pkg=net-tools state=latest -- name: PkgMgr CentOS | Get openstack kilo rpm +- name: PkgMgr RHEL/CentOS | Get openstack ocata rpm get_url: - url: https://repos.fedorapeople.org/repos/openstack/openstack-kilo/rdo-release-kilo-2.noarch.rpm - dest: /tmp/rdo-release-kilo-2.noarch.rpm + url: https://repos.fedorapeople.org/repos/openstack/openstack-ocata/rdo-release-ocata-2.noarch.rpm + dest: /tmp/rdo-release-ocata-2.noarch.rpm validate_certs: False environment: http_proxy: "{{ http_proxy|default('') }}" @@ -16,15 +16,15 @@ tags: - ovs_install -- name: PkgMgr CentOS | Install openstack kilo rpm - yum: name=/tmp/rdo-release-kilo-2.noarch.rpm state=present +- name: PkgMgr RHEL/CentOS | Install openstack ocata rpm + yum: name=/tmp/rdo-release-ocata-2.noarch.rpm state=present tags: - ovs_install -- name: PkgMgr CentOS | Install ovs +- name: PkgMgr RHEL/CentOS | Install ovs yum: - pkg=openvswitch - state=latest + pkg=openvswitch-2.5.0-2.el7.x86_64 + state=present environment: http_proxy: "{{ http_proxy|default('') }}" https_proxy: "{{ https_proxy|default('') }}" diff --git a/roles/contiv/templates/netplugin.j2 b/roles/contiv/templates/netplugin.j2 index f3d26c037..a4928cc3d 100644 --- a/roles/contiv/templates/netplugin.j2 +++ b/roles/contiv/templates/netplugin.j2 @@ -1,9 +1,7 @@ {% if contiv_encap_mode == "vlan" %} NETPLUGIN_ARGS='-vlan-if {{ netplugin_interface }} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' {% endif %} -{# Note: Commenting out vxlan encap mode support until it is fully supported {% if contiv_encap_mode == "vxlan" %} -NETPLUGIN_ARGS='-vtep-ip {{ netplugin_ctrl_ip }} -e {{contiv_encap_mode}} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' +NETPLUGIN_ARGS='-vtep-ip {{ netplugin_ctrl_ip }} -ctrl-ip {{ netplugin_ctrl_ip }} -plugin-mode kubernetes -cluster-store etcd://{{ etcd_url }}' {% endif %} -#} diff --git a/roles/contiv_auth_proxy/README.md b/roles/contiv_auth_proxy/README.md new file mode 100644 index 000000000..287b6c148 --- /dev/null +++ b/roles/contiv_auth_proxy/README.md @@ -0,0 +1,29 @@ +Role Name +========= + +Role to install Contiv API Proxy and UI + +Requirements +------------ + +Docker needs to be installed to run the auth proxy container. + +Role Variables +-------------- + +auth_proxy_image specifies the image with version tag to be used to spin up the auth proxy container. +auth_proxy_cert, auth_proxy_key specify files to use for the proxy server certificates. +auth_proxy_port is the host port and auth_proxy_datastore the cluster data store address. + +Dependencies +------------ + +docker + +Example Playbook +---------------- + +- hosts: netplugin-node + become: true + roles: + - { role: auth_proxy, auth_proxy_port: 10000, auth_proxy_datastore: etcd://netmaster:22379 } diff --git a/roles/contiv_auth_proxy/defaults/main.yml b/roles/contiv_auth_proxy/defaults/main.yml new file mode 100644 index 000000000..4e637a947 --- /dev/null +++ b/roles/contiv_auth_proxy/defaults/main.yml @@ -0,0 +1,11 @@ +--- +auth_proxy_image: "contiv/auth_proxy:1.0.0-beta.2" +auth_proxy_port: 10000 +contiv_certs: "/var/contiv/certs" +cluster_store: "{{ hostvars[groups['masters'][0]]['ansible_' + netmaster_interface].ipv4.address }}:22379" +auth_proxy_cert: "{{ contiv_certs }}/auth_proxy_cert.pem" +auth_proxy_key: "{{ contiv_certs }}/auth_proxy_key.pem" +auth_proxy_datastore: "{{ cluster_store }}" +auth_proxy_binaries: "/var/contiv_cache" +auth_proxy_local_install: False +auth_proxy_rule_comment: "Contiv auth proxy service" diff --git a/roles/contiv_auth_proxy/files/auth-proxy.service b/roles/contiv_auth_proxy/files/auth-proxy.service new file mode 100644 index 000000000..7cd2edff1 --- /dev/null +++ b/roles/contiv_auth_proxy/files/auth-proxy.service @@ -0,0 +1,13 @@ +[Unit] +Description=Contiv Proxy and UI +After=auditd.service systemd-user-sessions.service time-sync.target docker.service + +[Service] +ExecStart=/usr/bin/auth_proxy.sh start +ExecStop=/usr/bin/auth_proxy.sh stop +KillMode=control-group +Restart=on-failure +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/roles/contiv_auth_proxy/handlers/main.yml b/roles/contiv_auth_proxy/handlers/main.yml new file mode 100644 index 000000000..9cb9bea49 --- /dev/null +++ b/roles/contiv_auth_proxy/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# handlers file for auth_proxy diff --git a/roles/contiv_auth_proxy/tasks/cleanup.yml b/roles/contiv_auth_proxy/tasks/cleanup.yml new file mode 100644 index 000000000..a29659cc9 --- /dev/null +++ b/roles/contiv_auth_proxy/tasks/cleanup.yml @@ -0,0 +1,10 @@ +--- + +- name: stop auth-proxy container + service: name=auth-proxy state=stopped + +- name: cleanup iptables for auth proxy + shell: iptables -D INPUT -p tcp --dport {{ item }} -j ACCEPT -m comment --comment "{{ auth_proxy_rule_comment }} ({{ item }})" + become: true + with_items: + - "{{ auth_proxy_port }}" diff --git a/roles/contiv_auth_proxy/tasks/main.yml b/roles/contiv_auth_proxy/tasks/main.yml new file mode 100644 index 000000000..74e7bf794 --- /dev/null +++ b/roles/contiv_auth_proxy/tasks/main.yml @@ -0,0 +1,37 @@ +--- +# tasks file for auth_proxy +- name: setup iptables for auth proxy + shell: > + ( iptables -L INPUT | grep "{{ auth_proxy_rule_comment }} ({{ item }})" ) || \ + iptables -I INPUT 1 -p tcp --dport {{ item }} -j ACCEPT -m comment --comment "{{ auth_proxy_rule_comment }} ({{ item }})" + become: true + with_items: + - "{{ auth_proxy_port }}" + +# Load the auth-proxy-image from local tar. Ignore any errors to handle the +# case where the image is not built in +- name: copy auth-proxy image + copy: src={{ auth_proxy_binaries }}/auth-proxy-image.tar dest=/tmp/auth-proxy-image.tar + when: auth_proxy_local_install == True + +- name: load auth-proxy image + shell: docker load -i /tmp/auth-proxy-image.tar + when: auth_proxy_local_install == True + +- name: create cert folder for proxy + file: path=/var/contiv/certs state=directory + +- name: copy shell script for starting auth-proxy + template: src=auth_proxy.j2 dest=/usr/bin/auth_proxy.sh mode=u=rwx,g=rx,o=rx + +- name: copy cert for starting auth-proxy + copy: src=cert.pem dest=/var/contiv/certs/auth_proxy_cert.pem mode=u=rw,g=r,o=r + +- name: copy key for starting auth-proxy + copy: src=key.pem dest=/var/contiv/certs/auth_proxy_key.pem mode=u=rw,g=r,o=r + +- name: copy systemd units for auth-proxy + copy: src=auth-proxy.service dest=/etc/systemd/system/auth-proxy.service + +- name: start auth-proxy container + systemd: name=auth-proxy daemon_reload=yes state=started enabled=yes diff --git a/roles/contiv_auth_proxy/templates/auth_proxy.j2 b/roles/contiv_auth_proxy/templates/auth_proxy.j2 new file mode 100644 index 000000000..e82e5b4ab --- /dev/null +++ b/roles/contiv_auth_proxy/templates/auth_proxy.j2 @@ -0,0 +1,36 @@ +#!/bin/bash + +usage="$0 start/stop" +if [ $# -ne 1 ]; then + echo USAGE: $usage + exit 1 +fi + +case $1 in +start) + set -e + + /usr/bin/docker run --rm \ + -p 10000:{{ auth_proxy_port }} \ + --net=host --name=auth-proxy \ + -e NO_NETMASTER_STARTUP_CHECK=1 \ + -v /var/contiv:/var/contiv \ + {{ auth_proxy_image }} \ + --tls-key-file={{ auth_proxy_key }} \ + --tls-certificate={{ auth_proxy_cert }} \ + --data-store-address={{ auth_proxy_datastore }} \ + --netmaster-address={{ service_vip }}:9999 \ + --listen-address=:10000 + ;; + +stop) + # don't stop on error + /usr/bin/docker stop auth-proxy + /usr/bin/docker rm -f -v auth-proxy + ;; + +*) + echo USAGE: $usage + exit 1 + ;; +esac diff --git a/roles/contiv_auth_proxy/tests/inventory b/roles/contiv_auth_proxy/tests/inventory new file mode 100644 index 000000000..d18580b3c --- /dev/null +++ b/roles/contiv_auth_proxy/tests/inventory @@ -0,0 +1 @@ +localhost
\ No newline at end of file diff --git a/roles/contiv_auth_proxy/tests/test.yml b/roles/contiv_auth_proxy/tests/test.yml new file mode 100644 index 000000000..2af3250cd --- /dev/null +++ b/roles/contiv_auth_proxy/tests/test.yml @@ -0,0 +1,5 @@ +--- +- hosts: localhost + remote_user: root + roles: + - auth_proxy diff --git a/roles/contiv_auth_proxy/vars/main.yml b/roles/contiv_auth_proxy/vars/main.yml new file mode 100644 index 000000000..9032766c4 --- /dev/null +++ b/roles/contiv_auth_proxy/vars/main.yml @@ -0,0 +1,2 @@ +--- +# vars file for auth_proxy diff --git a/roles/contiv_facts/defaults/main.yaml b/roles/contiv_facts/defaults/main.yaml index a6c08fa63..7b8150954 100644 --- a/roles/contiv_facts/defaults/main.yaml +++ b/roles/contiv_facts/defaults/main.yaml @@ -8,3 +8,6 @@ bin_dir: /usr/bin ansible_temp_dir: /tmp/.ansible/files source_type: packageManager + +# Whether or not to also install and enable the Contiv auth_proxy +contiv_enable_auth_proxy: false diff --git a/roles/etcd_migrate/README.md b/roles/etcd_migrate/README.md new file mode 100644 index 000000000..369e78ff2 --- /dev/null +++ b/roles/etcd_migrate/README.md @@ -0,0 +1,53 @@ +Role Name +========= + +Offline etcd migration of data from v2 to v3 + +Requirements +------------ + +It is expected all consumers of the etcd data are not accessing the data. +Otherwise the migrated data can be out-of-sync with the v2 and can result in unhealthy etcd cluster. + +The role itself is responsible for: +- checking etcd cluster health and raft status before the migration +- checking of presence of any v3 data (in that case the migration is stopped) +- migration of v2 data to v3 data (including attaching leases of keys prefixed with "/kubernetes.io/events" and "/kubernetes.io/masterleases" string) +- validation of migrated data (all v2 keys and in v3 keys and are set to the identical value) + +The migration itself requires an etcd member to be down in the process. Once the migration is done, the etcd member is started. + +Role Variables +-------------- + +TBD + +Dependencies +------------ + +- etcd_common +- lib_utils + +Example Playbook +---------------- + +```yaml +- name: Migrate etcd data from v2 to v3 + hosts: oo_etcd_to_config + gather_facts: no + tasks: + - include_role: + name: openshift_etcd_migrate + vars: + etcd_peer: "{{ ansible_default_ipv4.address }}" +``` + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +Jan Chaloupka (jchaloup@redhat.com) diff --git a/roles/etcd_migrate/defaults/main.yml b/roles/etcd_migrate/defaults/main.yml new file mode 100644 index 000000000..05cf41fbb --- /dev/null +++ b/roles/etcd_migrate/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# Default action when calling this role, choices: check, migrate, configure +r_etcd_migrate_action: migrate diff --git a/roles/etcd_migrate/meta/main.yml b/roles/etcd_migrate/meta/main.yml new file mode 100644 index 000000000..f3cabbef6 --- /dev/null +++ b/roles/etcd_migrate/meta/main.yml @@ -0,0 +1,17 @@ +--- +galaxy_info: + author: Jan Chaloupka + description: Etcd migration + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.1 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: +- { role: etcd_common } +- { role: lib_utils } diff --git a/roles/etcd_migrate/tasks/check.yml b/roles/etcd_migrate/tasks/check.yml new file mode 100644 index 000000000..2f07713bc --- /dev/null +++ b/roles/etcd_migrate/tasks/check.yml @@ -0,0 +1,55 @@ +--- +# Check the cluster is healthy +- include: check_cluster_health.yml + +# Check if the member has v3 data already +# Run the migration only if the data are v2 +- name: Check if there are any v3 data + command: > + etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:2379' get "" --from-key --keys-only -w json --limit 1 + environment: + ETCDCTL_API: 3 + register: l_etcdctl_output + +- fail: + msg: "Unable to get a number of v3 keys" + when: l_etcdctl_output.rc != 0 + +- fail: + msg: "The etcd has at least one v3 key" + when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0" + + +# TODO(jchaloup): once the until loop can be used over include/block, +# remove the repetive code +# - until loop not supported over include statement (nor block) +# https://github.com/ansible/ansible/issues/17098 +# - with_items not supported over block + +# Check the cluster status for the first time +- include: check_cluster_status.yml + +# Check the cluster status for the second time +- block: + - debug: + msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" + - name: Wait a while before another check + pause: + seconds: 5 + when: not l_etcd_cluster_status_ok | bool + + - include: check_cluster_status.yml + when: not l_etcd_cluster_status_ok | bool + + +# Check the cluster status for the third time +- block: + - debug: + msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" + - name: Wait a while before another check + pause: + seconds: 5 + when: not l_etcd_cluster_status_ok | bool + + - include: check_cluster_status.yml + when: not l_etcd_cluster_status_ok | bool diff --git a/roles/etcd_migrate/tasks/check_cluster_health.yml b/roles/etcd_migrate/tasks/check_cluster_health.yml new file mode 100644 index 000000000..1abd6a32f --- /dev/null +++ b/roles/etcd_migrate/tasks/check_cluster_health.yml @@ -0,0 +1,23 @@ +--- +- name: Check cluster health + command: > + etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://{{ etcd_peer }}:2379 cluster-health + register: etcd_cluster_health + changed_when: false + failed_when: false + +- name: Assume a member is not healthy + set_fact: + etcd_member_healthy: false + +- name: Get member item health status + set_fact: + etcd_member_healthy: true + with_items: "{{ etcd_cluster_health.stdout_lines }}" + when: "(etcd_peer in item) and ('is healthy' in item)" + +- name: Check the etcd cluster health + # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy? + fail: + msg: "Etcd member {{ etcd_peer }} is not healthy" + when: not etcd_member_healthy diff --git a/roles/etcd_migrate/tasks/check_cluster_status.yml b/roles/etcd_migrate/tasks/check_cluster_status.yml new file mode 100644 index 000000000..90fe385c1 --- /dev/null +++ b/roles/etcd_migrate/tasks/check_cluster_status.yml @@ -0,0 +1,32 @@ +--- +# etcd_ip originates from etcd_common role +- name: Check cluster status + command: > + etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints 'https://{{ etcd_peer }}:2379' -w json endpoint status + environment: + ETCDCTL_API: 3 + register: l_etcd_cluster_status + +- name: Retrieve raftIndex + set_fact: + etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}" + +- block: + # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers + - name: Group all raftIndices into a list + set_fact: + etcd_members_raft_indices: "{{ groups['oo_etcd_to_config'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" + + - name: Check the minimum and the maximum of raftIndices is at most 1 + set_fact: + etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}" + + - debug: + msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" + + when: inventory_hostname in groups.oo_etcd_to_config[0] + +# The cluster raft status is ok if the difference of the max and min raft index is at most 1 +- name: capture the status + set_fact: + l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_config[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd_migrate/tasks/configure.yml b/roles/etcd_migrate/tasks/configure.yml new file mode 100644 index 000000000..a305d5bf3 --- /dev/null +++ b/roles/etcd_migrate/tasks/configure.yml @@ -0,0 +1,13 @@ +--- +- name: Configure master to use etcd3 storage backend + yedit: + src: /etc/origin/master/master-config.yaml + key: "{{ item.key }}" + value: "{{ item.value }}" + with_items: + - key: kubernetesMasterConfig.apiServerArguments.storage-backend + value: + - etcd3 + - key: kubernetesMasterConfig.apiServerArguments.storage-media-type + value: + - application/vnd.kubernetes.protobuf diff --git a/roles/etcd_migrate/tasks/main.yml b/roles/etcd_migrate/tasks/main.yml new file mode 100644 index 000000000..409b0b613 --- /dev/null +++ b/roles/etcd_migrate/tasks/main.yml @@ -0,0 +1,25 @@ +--- +- name: Fail if invalid r_etcd_migrate_action provided + fail: + msg: "etcd_migrate role can only be called with 'check' or 'migrate' or 'configure'" + when: r_etcd_migrate_action not in ['check', 'migrate', 'configure'] + +- name: Include main action task file + include: "{{ r_etcd_migrate_action }}.yml" + +# 2. migrate v2 datadir into v3: +# ETCDCTL_API=3 ./etcdctl migrate --data-dir=${data_dir} --no-ttl +# backup the etcd datadir first +# Provide a way for an operator to specify transformer + +# 3. re-configure OpenShift master at /etc/origin/master/master-config.yml +# set storage-backend to “etcd3” +# 4. we could leave the master restart to current logic (there is already the code ready (single vs. HA master)) + +# Run +# etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://172.16.186.45:2379 cluster-health +# to check the cluster health (from the etcdctl.sh aliases file) + +# Another assumption: +# - in order to migrate all etcd v2 data into v3, we need to shut down the cluster (let's verify that on Wednesday meeting) +# - diff --git a/roles/etcd_migrate/tasks/migrate.yml b/roles/etcd_migrate/tasks/migrate.yml new file mode 100644 index 000000000..cb479b0cc --- /dev/null +++ b/roles/etcd_migrate/tasks/migrate.yml @@ -0,0 +1,53 @@ +--- +# Should this be run in a serial manner? +- set_fact: + l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" + +- name: Disable etcd members + service: + name: "{{ l_etcd_service }}" + state: stopped + +# Should we skip all TTL keys? https://bugzilla.redhat.com/show_bug.cgi?id=1389773 +- name: Migrate etcd data + command: > + etcdctl migrate --data-dir={{ etcd_data_dir }} + environment: + ETCDCTL_API: 3 + register: l_etcdctl_migrate + +# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup +- name: Check the etcd v2 data are correctly migrated + fail: + msg: "Failed to migrate a member" + when: "'finished transforming keys' not in l_etcdctl_migrate.stdout" + +# TODO(jchaloup): start the etcd on a different port so noone can access it +# Once the validation is done +- name: Enable etcd member + service: + name: "{{ l_etcd_service }}" + state: started + +- name: Re-introduce leases (as a replacement for key TTLs) + command: > + oadm migrate etcd-ttl \ + --cert {{ etcd_peer_cert_file }} \ + --key {{ etcd_peer_key_file }} \ + --cacert {{ etcd_peer_ca_file }} \ + --etcd-address 'https://{{ etcd_peer }}:2379' \ + --ttl-keys-prefix {{ item }} \ + --lease-duration 1h + environment: + ETCDCTL_API: 3 + with_items: + - "/kubernetes.io/events" + - "/kubernetes.io/masterleases" + +- set_fact: + r_etcd_migrate_success: true + +- name: Enable etcd member + service: + name: "{{ l_etcd_service }}" + state: started diff --git a/roles/lib_openshift/library/oc_obj.py b/roles/lib_openshift/library/oc_obj.py index 56af303cc..9b0c0e0e4 100644 --- a/roles/lib_openshift/library/oc_obj.py +++ b/roles/lib_openshift/library/oc_obj.py @@ -90,9 +90,9 @@ options: required: false default: str aliases: [] - all_namespace: + all_namespaces: description: - - The namespace where the object lives. + - Search in all namespaces for the object. required: false default: false aliases: [] diff --git a/roles/lib_openshift/src/doc/obj b/roles/lib_openshift/src/doc/obj index 4ff912b2d..c6504ed01 100644 --- a/roles/lib_openshift/src/doc/obj +++ b/roles/lib_openshift/src/doc/obj @@ -39,9 +39,9 @@ options: required: false default: str aliases: [] - all_namespace: + all_namespaces: description: - - The namespace where the object lives. + - Search in all namespaces for the object. required: false default: false aliases: [] diff --git a/roles/openshift_cfme/README.md b/roles/openshift_cfme/README.md new file mode 100644 index 000000000..8283afed6 --- /dev/null +++ b/roles/openshift_cfme/README.md @@ -0,0 +1,404 @@ +# OpenShift-Ansible - CFME Role + +# PROOF OF CONCEPT - Alpha Version + +This role is based on the work in the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. For additional literature on configuration specific to +ManageIQ (optional post-installation tasks), visit the project's +[upstream documentation page](http://manageiq.org/docs/get-started/basic-configuration). + +Please submit a +[new issue](https://github.com/openshift/openshift-ansible/issues/new) +if you run into bugs with this role or wish to request enhancements. + +# Important Notes + +This is an early *proof of concept* role to install the Cloud Forms +Management Engine (ManageIQ) on OpenShift Container Platform (OCP). + +* This role is still in **ALPHA STATUS** +* Many options are hard-coded still (ex: NFS setup) +* Not many configurable options yet +* **Should** be ran on a dedicated cluster +* **Will not run** on undersized infra +* The terms *CFME* and *MIQ* / *ManageIQ* are interchangeable + +## Requirements + +**NOTE:** These requirements are copied from the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. + +### Prerequisites: + +* + [OpenShift Origin 1.5](https://docs.openshift.com/container-platform/3.5/welcome/index.html) + or + [higher](https://docs.openshift.com/container-platform/latest/welcome/index.html) + provisioned +* NFS or other compatible volume provider +* A cluster-admin user (created by role if required) + +### Cluster Sizing + +In order to avoid random deployment failures due to resource +starvation, we recommend a minimum cluster size for a **test** +environment. + +| Type | Size | CPUs | Memory | +|----------------|---------|----------|----------| +| Masters | `1+` | `8` | `12GB` | +| Nodes | `2+` | `4` | `8GB` | +| PV Storage | `25GB` | `N/A` | `N/A` | + + +![Basic CFME Deployment](img/CFMEBasicDeployment.png) + +**CFME has hard-requirements for memory. CFME will NOT install if your + infrastructure does not meet or exceed the requirements given + above. Do not run this playbook if you do not have the required + memory, you will just waste your time.** + + +### Other sizing considerations + +* Recommendations assume MIQ will be the **only application running** + on this cluster. +* Alternatively, you can provision an infrastructure node to run + registry/metrics/router/logging pods. +* Each MIQ application pod will consume at least `3GB` of RAM on initial + deployment (blank deployment without providers). +* RAM consumption will ramp up higher depending on appliance use, once + providers are added expect higher resource consumption. + + +### Assumptions + +1) You meet/exceed the [cluster sizing](#cluster-sizing) requirements +1) Your NFS server is on your master host +1) Your PV backing NFS storage volume is mounted on `/exports/` + +Required directories that NFS will export to back the PVs: + +* `/exports/miq-pv0[123]` + +If the required directories are not present at install-time, they will +be created using the recommended permissions per the +[upstream documentation](https://github.com/ManageIQ/manageiq-pods#make-persistent-volumes-to-host-the-miq-database-and-application-data): + +* UID/GID: `root`/`root` +* Mode: `0775` + +**IMPORTANT:** If you are using a separate volume (`/dev/vdX`) for NFS + storage, **ensure** it is mounted on `/exports/` **before** running + this role. + + + +## Role Variables + +Core variables in this role: + +| Name | Default value | Description | +|-------------------------------|---------------|---------------| +| `openshift_cfme_install_app` | `False` | `True`: Install everything and create a new CFME app, `False`: Just install all of the templates and scaffolding | + + +Variables you may override have defaults defined in +[defaults/main.yml](defaults/main.yml). + + +# Important Notes + +This is a **tech preview** status role presently. Use it with the same +caution you would give any other pre-release software. + +**Most importantly** follow this one rule: don't re-run the entrypoint +playbook multiple times in a row without cleaning up after previous +runs if some of the CFME steps have ran. This is a known +flake. Cleanup instructions are provided at the bottom of this README. + + +# Usage + +This section describes the basic usage of this role. All parameters +will use their [default values](defaults/main.yml). + +## Pre-flight Checks + +**IMPORTANT:** As documented above in [the prerequisites](#prerequisites), + you **must already** have your OCP cluster up and running. + +**Optional:** The ManageIQ pod is fairly large (about 1.7 GB) so to +save some spin-up time post-deployment, you can begin pre-pulling the +docker image to each of your nodes now: + +``` +root@node0x # docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +## Getting Started + +1) The *entry point playbook* to install CFME is located in +[the BYO playbooks](../../playbooks/byo/openshift-cfme/config.yml) +directory + +2) Update your existing `hosts` inventory file and ensure the +parameter `openshift_cfme_install_app` is set to `True` under the +`[OSEv3:vars]` block. + +2) Using your existing `hosts` inventory file, run `ansible-playbook` +with the entry point playbook: + +``` +$ ansible-playbook -v -i <INVENTORY_FILE> playbooks/byo/openshift-cfme/config.yml +``` + +## Next Steps + +Once complete, the playbook will let you know: + + +``` +TASK [openshift_cfme : Status update] ********************************************************* +ok: [ho.st.na.me] => { + "msg": "CFME has been deployed. Note that there will be a delay before it is fully initialized.\n" +} +``` + +This will take several minutes (*possibly 10 or more*, depending on +your network connection). However, you can get some insight into the +deployment process during initialization. + +### oc describe pod manageiq-0 + +*Some useful information about the output you will see if you run the +`oc describe pod manageiq-0` command* + +**Readiness probe**s - These will take a while to become +`Healthy`. The initial health probes won't even happen for at least 8 +minutes depending on how long it takes you to pull down the large +images. ManageIQ is a large application so it may take a considerable +amount of time for it to deploy and be marked as `Healthy`. + +If you go to the node you know the application is running on (check +for `Successfully assigned manageiq-0 to <HOST|IP>` in the `describe` +output) you can run a `docker pull` command to monitor the progress of +the image pull: + +``` +[root@cfme-node ~]# docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +Trying to pull repository docker.io/manageiq/manageiq-pods ... +sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a: Pulling from docker.io/manageiq/manageiq-pods +Digest: sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a +Status: Image is up to date for docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +The example above demonstrates the case where the image has been +successfully pulled already. + +If the image isn't completely pulled already then you will see +multiple progress bars detailing each image layer download status. + + +### rsh + +*Useful inspection/progress monitoring techniques with the `oc rsh` +command.* + + +On your master node, switch to the `cfme` project (or whatever you +named it if you overrode the `openshift_cfme_project` variable) and +check on the pod states: + +``` +[root@cfme-master01 ~]# oc project cfme +Now using project "cfme" on server "https://10.10.0.100:8443". + +[root@cfme-master01 ~]# oc get pod +NAME READY STATUS RESTARTS AGE +manageiq-0 0/1 Running 0 14m +memcached-1-3lk7g 1/1 Running 0 14m +postgresql-1-12slb 1/1 Running 0 14m +``` + +Note how the `manageiq-0` pod says `0/1` under the **READY** +column. After some time (depending on your network connection) you'll +be able to `rsh` into the pod to find out more of what's happening in +real time. First, the easy-mode command, run this once `rsh` is +available and then watch until it says `Started Initialize Appliance +Database`: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 journalctl -f -u appliance-initialize.service +``` + +For the full explanation of what this means, and more interactive +inspection techniques, keep reading on. + +To obtain a shell on our `manageiq` pod we use this command: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 bash -l +``` + +The `rsh` command opens a shell in your pod for you. In this case it's +the pod called `manageiq-0`. `systemd` is managing the services in +this pod so we can use the `list-units` command to see what is running +currently: `# systemctl list-units | grep appliance`. + +If you see the `appliance-initialize` service running, this indicates +that basic setup is still in progress. We can monitor the process with +the `journalctl` command like so: + + +``` +[root@manageiq-0 vmdb]# journalctl -f -u appliance-initialize.service +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking deployment status == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: No pre-existing EVM configuration found on region PV +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking for existing data on server PV == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Starting New Deployment == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Applying memcached config == +Jun 14 14:55:53 manageiq-0 appliance-initialize.sh[58]: == Initializing Appliance == +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: create encryption key +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: configuring external database +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: Checking for connections to the database... +Jun 14 14:56:09 manageiq-0 appliance-initialize.sh[58]: Create region starting +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: Create region complete +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data backup == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sending incremental file list +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: created directory /persistent/server-deploy/backup/backup_2017_06_14_145816 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/REGION +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/v2_key +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/database.yml +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/GUID +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sent 1330 bytes received 136 bytes 2932.00 bytes/sec +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: total size is 770 speedup is 0.53 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Restoring PV data symlinks == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/REGION symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/config/database.yml symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/certs/v2_key symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/log symlink is already in place, skipping +Jun 14 14:58:28 manageiq-0 systemctl[304]: Removed symlink /etc/systemd/system/multi-user.target.wants/appliance-initialize.service. +Jun 14 14:58:29 manageiq-0 systemd[1]: Started Initialize Appliance Database. +``` + +Most of what we see here (above) is the initial database seeding +process. This process isn't very quick, so be patient. + +At the bottom of the log there is a special line from the `systemctl` +service, `Removed symlink +/etc/systemd/system/multi-user.target.wants/appliance-initialize.service`. The +`appliance-initialize` service is no longer marked as enabled. This +indicates that the base application initialization is complete now. + +We're not done yet though, there are other ancillary services which +run in this pod to support the application. *Still in the rsh shell*, +Use the `ps` command to monitor for the `httpd` processes +starting. You will see output similar to the following when that stage +has completed: + +``` +[root@manageiq-0 vmdb]# ps aux | grep http +root 1941 0.0 0.1 249820 7640 ? Ss 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1942 0.0 0.0 250752 6012 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1943 0.0 0.0 250472 5952 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1944 0.0 0.0 250472 5916 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1945 0.0 0.0 250360 5764 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +``` + +Furthermore, you can find other related processes by just looking for +ones with `MIQ` in their name: + +``` +[root@manageiq-0 vmdb]# ps aux | grep miq +root 333 27.7 4.2 555884 315916 ? Sl 14:58 3:59 MIQ Server +root 1976 0.6 4.0 507224 303740 ? SNl 15:02 0:03 MIQ: MiqGenericWorker id: 1, queue: generic +root 1984 0.6 4.0 507224 304312 ? SNl 15:02 0:03 MIQ: MiqGenericWorker id: 2, queue: generic +root 1992 0.9 4.0 508252 304888 ? SNl 15:02 0:05 MIQ: MiqPriorityWorker id: 3, queue: generic +root 2000 0.7 4.0 510308 304696 ? SNl 15:02 0:04 MIQ: MiqPriorityWorker id: 4, queue: generic +root 2008 1.2 4.0 514000 303612 ? SNl 15:02 0:07 MIQ: MiqScheduleWorker id: 5 +root 2026 0.2 4.0 517504 303644 ? SNl 15:02 0:01 MIQ: MiqEventHandler id: 6, queue: ems +root 2036 0.2 4.0 518532 303768 ? SNl 15:02 0:01 MIQ: MiqReportingWorker id: 7, queue: reporting +root 2044 0.2 4.0 519560 303812 ? SNl 15:02 0:01 MIQ: MiqReportingWorker id: 8, queue: reporting +root 2059 0.2 4.0 528372 303956 ? SNl 15:02 0:01 puma 3.3.0 (tcp://127.0.0.1:5000) [MIQ: Web Server Worker] +root 2067 0.9 4.0 529664 305716 ? SNl 15:02 0:05 puma 3.3.0 (tcp://127.0.0.1:3000) [MIQ: Web Server Worker] +root 2075 0.2 4.0 529408 304056 ? SNl 15:02 0:01 puma 3.3.0 (tcp://127.0.0.1:4000) [MIQ: Web Server Worker] +root 2329 0.0 0.0 10640 972 ? S+ 15:13 0:00 grep --color=auto -i miq +``` + +Finally, *still in the rsh shell*, to test if the application is +running correctly, we can request the application homepage. If the +page is available the page title will be `ManageIQ: Login`: + +``` +[root@manageiq-0 vmdb]# curl -s -k https://localhost | grep -A2 '<title>' +<title> +ManageIQ: Login +</title> +``` + +**Note:** The `-s` flag makes `curl` operations silent and the `-k` +flag to ignore errors about untrusted certificates. + + + +# Additional Upstream Resources + +Below are some useful resources from the upstream project +documentation. You may find these of value. + +* [Verify Setup Was Successful](https://github.com/ManageIQ/manageiq-pods#verifying-the-setup-was-successful) +* [POD Access And Routes](https://github.com/ManageIQ/manageiq-pods#pod-access-and-routes) +* [Troubleshooting](https://github.com/ManageIQ/manageiq-pods#troubleshooting) + + +# Manual Cleanup + +At this time uninstallation/cleanup is still a manual process. You +will have to follow a few steps to fully remove CFME from your +cluster. + +Delete the project: + +* `oc delete project cfme` + +Delete the PVs: + +* `oc delete pv miq-pv01` +* `oc delete pv miq-pv02` +* `oc delete pv miq-pv03` + +Clean out the old PV data: + +* `cd /exports/` +* `find miq* -type f -delete` +* `find miq* -type d -delete` + +Remove the NFS exports: + +* `rm /etc/exports.d/openshift_cfme.exports` +* `exportfs -ar` + +Delete the user: + +* `oc delete user cfme` + +**NOTE:** The `oc delete project cfme` command will return quickly +however it will continue to operate in the background. Continue +running `oc get project` after you've completed the other steps to +monitor the pods and final project termination progress. diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml new file mode 100644 index 000000000..493e1ef68 --- /dev/null +++ b/roles/openshift_cfme/defaults/main.yml @@ -0,0 +1,38 @@ +--- +# Namespace for the CFME project +openshift_cfme_project: cfme +# Namespace/project description +openshift_cfme_project_description: ManageIQ - CloudForms Management Engine +# Basic user assigned the `admin` role for the project +openshift_cfme_user: cfme +# Project system account for enabling privileged pods +openshift_cfme_service_account: "system:serviceaccount:{{ openshift_cfme_project }}:default" +# All the required exports +openshift_cfme_pv_exports: + - miq-pv01 + - miq-pv02 + - miq-pv03 +# PV template files and their created object names +openshift_cfme_pv_data: + - pv_name: miq-pv01 + pv_template: miq-pv-db.yaml + pv_label: CFME DB PV + - pv_name: miq-pv02 + pv_template: miq-pv-region.yaml + pv_label: CFME Region PV + - pv_name: miq-pv03 + pv_template: miq-pv-server.yaml + pv_label: CFME Server PV + +# Tuning parameter to use more than 5 images at once from an ImageStream +openshift_cfme_maxImagesBulkImportedPerRepository: 100 +# Hostname/IP of the NFS server. Currently defaults to first master +openshift_cfme_nfs_server: "{{ groups.nfs.0 }}" +# TODO: Refactor '_install_app' variable. This is just for testing but +# maybe in the future it should control the entire yes/no for CFME. +# +# Whether or not the manageiq app should be initialized ('oc new-app +# --template=manageiq). If False everything UP TO 'new-app' is ran. +openshift_cfme_install_app: False +# Docker image to pull +openshift_cfme_container_image: "docker.io/manageiq/manageiq-pods:app-latest-fine" diff --git a/roles/openshift_cfme/files/miq-template.yaml b/roles/openshift_cfme/files/miq-template.yaml new file mode 100644 index 000000000..8f0d2af38 --- /dev/null +++ b/roles/openshift_cfme/files/miq-template.yaml @@ -0,0 +1,566 @@ +--- +path: /tmp/miq-template-out +data: + apiVersion: v1 + kind: Template + labels: + template: manageiq + metadata: + name: manageiq + annotations: + description: "ManageIQ appliance with persistent storage" + tags: "instant-app,manageiq,miq" + iconClass: "icon-rails" + objects: + - apiVersion: v1 + kind: Secret + metadata: + name: "${NAME}-secrets" + stringData: + pg-password: "${DATABASE_PASSWORD}" + - apiVersion: v1 + kind: Service + metadata: + annotations: + description: "Exposes and load balances ManageIQ pods" + service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' + name: ${NAME} + spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: https + port: 443 + protocol: TCP + targetPort: 443 + selector: + name: ${NAME} + - apiVersion: v1 + kind: Route + metadata: + name: ${NAME} + spec: + host: ${APPLICATION_DOMAIN} + port: + targetPort: https + tls: + termination: passthrough + to: + kind: Service + name: ${NAME} + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-app + annotations: + description: "Keeps track of the ManageIQ image changes" + spec: + dockerImageRepository: "${APPLICATION_IMG_NAME}" + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-postgresql + annotations: + description: "Keeps track of the PostgreSQL image changes" + spec: + dockerImageRepository: "${POSTGRESQL_IMG_NAME}" + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-memcached + annotations: + description: "Keeps track of the Memcached image changes" + spec: + dockerImageRepository: "${MEMCACHED_IMG_NAME}" + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: "${NAME}-${DATABASE_SERVICE_NAME}" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${DATABASE_VOLUME_CAPACITY} + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: "${NAME}-region" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${APPLICATION_REGION_VOLUME_CAPACITY} + - apiVersion: apps/v1beta1 + kind: "StatefulSet" + metadata: + name: ${NAME} + annotations: + description: "Defines how to deploy the ManageIQ appliance" + spec: + serviceName: "${NAME}" + replicas: "${APPLICATION_REPLICA_COUNT}" + template: + metadata: + labels: + name: ${NAME} + name: ${NAME} + spec: + containers: + - name: manageiq + image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" + livenessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 480 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: / + port: 443 + scheme: HTTPS + initialDelaySeconds: 200 + timeoutSeconds: 3 + ports: + - containerPort: 80 + protocol: TCP + - containerPort: 443 + protocol: TCP + securityContext: + privileged: true + volumeMounts: + - + name: "${NAME}-server" + mountPath: "/persistent" + - + name: "${NAME}-region" + mountPath: "/persistent-region" + env: + - + name: "APPLICATION_INIT_DELAY" + value: "${APPLICATION_INIT_DELAY}" + - + name: "DATABASE_SERVICE_NAME" + value: "${DATABASE_SERVICE_NAME}" + - + name: "DATABASE_REGION" + value: "${DATABASE_REGION}" + - + name: "MEMCACHED_SERVICE_NAME" + value: "${MEMCACHED_SERVICE_NAME}" + - + name: "POSTGRESQL_USER" + value: "${DATABASE_USER}" + - + name: "POSTGRESQL_PASSWORD" + valueFrom: + secretKeyRef: + name: "${NAME}-secrets" + key: "pg-password" + - + name: "POSTGRESQL_DATABASE" + value: "${DATABASE_NAME}" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + value: "${POSTGRESQL_MAX_CONNECTIONS}" + - + name: "POSTGRESQL_SHARED_BUFFERS" + value: "${POSTGRESQL_SHARED_BUFFERS}" + resources: + requests: + memory: "${APPLICATION_MEM_REQ}" + cpu: "${APPLICATION_CPU_REQ}" + limits: + memory: "${APPLICATION_MEM_LIMIT}" + lifecycle: + preStop: + exec: + command: + - /opt/manageiq/container-scripts/sync-pv-data + volumes: + - + name: "${NAME}-region" + persistentVolumeClaim: + claimName: ${NAME}-region + volumeClaimTemplates: + - metadata: + name: "${NAME}-server" + annotations: + # Uncomment this if using dynamic volume provisioning. + # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html + # volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: [ ReadWriteOnce ] + resources: + requests: + storage: "${APPLICATION_VOLUME_CAPACITY}" + - apiVersion: v1 + kind: "Service" + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + annotations: + description: "Exposes the memcached server" + spec: + ports: + - + name: "memcached" + port: 11211 + targetPort: 11211 + selector: + name: "${MEMCACHED_SERVICE_NAME}" + - apiVersion: v1 + kind: "DeploymentConfig" + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + annotations: + description: "Defines how to deploy memcached" + spec: + strategy: + type: "Recreate" + triggers: + - + type: "ImageChange" + imageChangeParams: + automatic: true + containerNames: + - "memcached" + from: + kind: "ImageStreamTag" + name: "miq-memcached:${MEMCACHED_IMG_TAG}" + - + type: "ConfigChange" + replicas: 1 + selector: + name: "${MEMCACHED_SERVICE_NAME}" + template: + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + labels: + name: "${MEMCACHED_SERVICE_NAME}" + spec: + volumes: [] + containers: + - + name: "memcached" + image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" + ports: + - + containerPort: 11211 + readinessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 5 + tcpSocket: + port: 11211 + livenessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 30 + tcpSocket: + port: 11211 + volumeMounts: [] + env: + - + name: "MEMCACHED_MAX_MEMORY" + value: "${MEMCACHED_MAX_MEMORY}" + - + name: "MEMCACHED_MAX_CONNECTIONS" + value: "${MEMCACHED_MAX_CONNECTIONS}" + - + name: "MEMCACHED_SLAB_PAGE_SIZE" + value: "${MEMCACHED_SLAB_PAGE_SIZE}" + resources: + requests: + memory: "${MEMCACHED_MEM_REQ}" + cpu: "${MEMCACHED_CPU_REQ}" + limits: + memory: "${MEMCACHED_MEM_LIMIT}" + - apiVersion: v1 + kind: "Service" + metadata: + name: "${DATABASE_SERVICE_NAME}" + annotations: + description: "Exposes the database server" + spec: + ports: + - + name: "postgresql" + port: 5432 + targetPort: 5432 + selector: + name: "${DATABASE_SERVICE_NAME}" + - apiVersion: v1 + kind: "DeploymentConfig" + metadata: + name: "${DATABASE_SERVICE_NAME}" + annotations: + description: "Defines how to deploy the database" + spec: + strategy: + type: "Recreate" + triggers: + - + type: "ImageChange" + imageChangeParams: + automatic: true + containerNames: + - "postgresql" + from: + kind: "ImageStreamTag" + name: "miq-postgresql:${POSTGRESQL_IMG_TAG}" + - + type: "ConfigChange" + replicas: 1 + selector: + name: "${DATABASE_SERVICE_NAME}" + template: + metadata: + name: "${DATABASE_SERVICE_NAME}" + labels: + name: "${DATABASE_SERVICE_NAME}" + spec: + volumes: + - + name: "miq-pgdb-volume" + persistentVolumeClaim: + claimName: "${NAME}-${DATABASE_SERVICE_NAME}" + containers: + - + name: "postgresql" + image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" + ports: + - + containerPort: 5432 + readinessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 15 + exec: + command: + - "/bin/sh" + - "-i" + - "-c" + - "psql -h 127.0.0.1 -U ${POSTGRESQL_USER} -q -d ${POSTGRESQL_DATABASE} -c 'SELECT 1'" + livenessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 60 + tcpSocket: + port: 5432 + volumeMounts: + - + name: "miq-pgdb-volume" + mountPath: "/var/lib/pgsql/data" + env: + - + name: "POSTGRESQL_USER" + value: "${DATABASE_USER}" + - + name: "POSTGRESQL_PASSWORD" + valueFrom: + secretKeyRef: + name: "${NAME}-secrets" + key: "pg-password" + - + name: "POSTGRESQL_DATABASE" + value: "${DATABASE_NAME}" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + value: "${POSTGRESQL_MAX_CONNECTIONS}" + - + name: "POSTGRESQL_SHARED_BUFFERS" + value: "${POSTGRESQL_SHARED_BUFFERS}" + resources: + requests: + memory: "${POSTGRESQL_MEM_REQ}" + cpu: "${POSTGRESQL_CPU_REQ}" + limits: + memory: "${POSTGRESQL_MEM_LIMIT}" + + parameters: + - + name: "NAME" + displayName: Name + required: true + description: "The name assigned to all of the frontend objects defined in this template." + value: manageiq + - + name: "DATABASE_SERVICE_NAME" + displayName: "PostgreSQL Service Name" + required: true + description: "The name of the OpenShift Service exposed for the PostgreSQL container." + value: "postgresql" + - + name: "DATABASE_USER" + displayName: "PostgreSQL User" + required: true + description: "PostgreSQL user that will access the database." + value: "root" + - + name: "DATABASE_PASSWORD" + displayName: "PostgreSQL Password" + required: true + description: "Password for the PostgreSQL user." + from: "[a-zA-Z0-9]{8}" + generate: expression + - + name: "DATABASE_NAME" + required: true + displayName: "PostgreSQL Database Name" + description: "Name of the PostgreSQL database accessed." + value: "vmdb_production" + - + name: "DATABASE_REGION" + required: true + displayName: "Application Database Region" + description: "Database region that will be used for application." + value: "0" + - + name: "MEMCACHED_SERVICE_NAME" + required: true + displayName: "Memcached Service Name" + description: "The name of the OpenShift Service exposed for the Memcached container." + value: "memcached" + - + name: "MEMCACHED_MAX_MEMORY" + displayName: "Memcached Max Memory" + description: "Memcached maximum memory for memcached object storage in MB." + value: "64" + - + name: "MEMCACHED_MAX_CONNECTIONS" + displayName: "Memcached Max Connections" + description: "Memcached maximum number of connections allowed." + value: "1024" + - + name: "MEMCACHED_SLAB_PAGE_SIZE" + displayName: "Memcached Slab Page Size" + description: "Memcached size of each slab page." + value: "1m" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + displayName: "PostgreSQL Max Connections" + description: "PostgreSQL maximum number of database connections allowed." + value: "100" + - + name: "POSTGRESQL_SHARED_BUFFERS" + displayName: "PostgreSQL Shared Buffer Amount" + description: "Amount of memory dedicated for PostgreSQL shared memory buffers." + value: "256MB" + - + name: "APPLICATION_CPU_REQ" + displayName: "Application Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." + value: "1000m" + - + name: "POSTGRESQL_CPU_REQ" + displayName: "PostgreSQL Min CPU Requested" + required: true + description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." + value: "500m" + - + name: "MEMCACHED_CPU_REQ" + displayName: "Memcached Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." + value: "200m" + - + name: "APPLICATION_MEM_REQ" + displayName: "Application Min RAM Requested" + required: true + description: "Minimum amount of memory the Application container will need." + value: "6144Mi" + - + name: "POSTGRESQL_MEM_REQ" + displayName: "PostgreSQL Min RAM Requested" + required: true + description: "Minimum amount of memory the PostgreSQL container will need." + value: "1024Mi" + - + name: "MEMCACHED_MEM_REQ" + displayName: "Memcached Min RAM Requested" + required: true + description: "Minimum amount of memory the Memcached container will need." + value: "64Mi" + - + name: "APPLICATION_MEM_LIMIT" + displayName: "Application Max RAM Limit" + required: true + description: "Maximum amount of memory the Application container can consume." + value: "16384Mi" + - + name: "POSTGRESQL_MEM_LIMIT" + displayName: "PostgreSQL Max RAM Limit" + required: true + description: "Maximum amount of memory the PostgreSQL container can consume." + value: "8192Mi" + - + name: "MEMCACHED_MEM_LIMIT" + displayName: "Memcached Max RAM Limit" + required: true + description: "Maximum amount of memory the Memcached container can consume." + value: "256Mi" + - + name: "POSTGRESQL_IMG_NAME" + displayName: "PostgreSQL Image Name" + description: "This is the PostgreSQL image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "POSTGRESQL_IMG_TAG" + displayName: "PostgreSQL Image Tag" + description: "This is the PostgreSQL image tag/version requested to deploy." + value: "postgresql-latest-fine" + - + name: "MEMCACHED_IMG_NAME" + displayName: "Memcached Image Name" + description: "This is the Memcached image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "MEMCACHED_IMG_TAG" + displayName: "Memcached Image Tag" + description: "This is the Memcached image tag/version requested to deploy." + value: "memcached-latest-fine" + - + name: "APPLICATION_IMG_NAME" + displayName: "Application Image Name" + description: "This is the Application image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "APPLICATION_IMG_TAG" + displayName: "Application Image Tag" + description: "This is the Application image tag/version requested to deploy." + value: "app-latest-fine" + - + name: "APPLICATION_DOMAIN" + displayName: "Application Hostname" + description: "The exposed hostname that will route to the application service, if left blank a value will be defaulted." + value: "" + - + name: "APPLICATION_REPLICA_COUNT" + displayName: "Application Replica Count" + description: "This is the number of Application replicas requested to deploy." + value: "1" + - + name: "APPLICATION_INIT_DELAY" + displayName: "Application Init Delay" + required: true + description: "Delay in seconds before we attempt to initialize the application." + value: "15" + - + name: "APPLICATION_VOLUME_CAPACITY" + displayName: "Application Volume Capacity" + required: true + description: "Volume space available for application data." + value: "5Gi" + - + name: "APPLICATION_REGION_VOLUME_CAPACITY" + displayName: "Application Region Volume Capacity" + required: true + description: "Volume space available for region application data." + value: "5Gi" + - + name: "DATABASE_VOLUME_CAPACITY" + displayName: "Database Volume Capacity" + required: true + description: "Volume space available for database." + value: "15Gi" diff --git a/roles/openshift_cfme/files/openshift_cfme.exports b/roles/openshift_cfme/files/openshift_cfme.exports new file mode 100644 index 000000000..5457d41fc --- /dev/null +++ b/roles/openshift_cfme/files/openshift_cfme.exports @@ -0,0 +1,3 @@ +/exports/miq-pv01 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv02 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv03 *(rw,no_root_squash,no_wdelay) diff --git a/roles/openshift_cfme/handlers/main.yml b/roles/openshift_cfme/handlers/main.yml new file mode 100644 index 000000000..476a5e030 --- /dev/null +++ b/roles/openshift_cfme/handlers/main.yml @@ -0,0 +1,42 @@ +--- +###################################################################### +# NOTE: These are duplicated from roles/openshift_master/handlers/main.yml +# +# TODO: Use the consolidated 'openshift_handlers' role once it's ready +# See: https://github.com/openshift/openshift-ansible/pull/4041#discussion_r118770782 +###################################################################### + +- name: restart master + systemd: name={{ openshift.common.service_type }}-master state=restarted + when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) + notify: Verify API Server + +- name: restart master api + systemd: name={{ openshift.common.service_type }}-master-api state=restarted + when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + notify: Verify API Server + +- name: restart master controllers + systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + +- name: Verify API Server + # Using curl here since the uri module requires python-httplib2 and + # wait_for port doesn't provide health information. + command: > + curl --silent --tlsv1.2 + {% if openshift.common.version_gte_3_2_or_1_2 | bool %} + --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt + {% else %} + --cacert {{ openshift.common.config_base }}/master/ca.crt + {% endif %} + {{ openshift.master.api_url }}/healthz/ready + args: + # Disables the following warning: + # Consider using get_url or uri module rather than running curl + warn: no + register: api_available_output + until: api_available_output.stdout == 'ok' + retries: 120 + delay: 1 + changed_when: false diff --git a/roles/openshift_cfme/img/CFMEBasicDeployment.png b/roles/openshift_cfme/img/CFMEBasicDeployment.png Binary files differnew file mode 100644 index 000000000..a89c1e325 --- /dev/null +++ b/roles/openshift_cfme/img/CFMEBasicDeployment.png diff --git a/roles/openshift_cfme/meta/main.yml b/roles/openshift_cfme/meta/main.yml new file mode 100644 index 000000000..9200f2c3c --- /dev/null +++ b/roles/openshift_cfme/meta/main.yml @@ -0,0 +1,20 @@ +--- +galaxy_info: + author: Tim Bielawa + description: OpenShift CFME (Manage IQ) Deployer + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.2 + version: 1.0 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: +- role: lib_openshift +- role: lib_utils +- role: openshift_common +- role: openshift_master_facts diff --git a/roles/openshift_cfme/tasks/create_pvs.yml b/roles/openshift_cfme/tasks/create_pvs.yml new file mode 100644 index 000000000..7fa7d3997 --- /dev/null +++ b/roles/openshift_cfme/tasks/create_pvs.yml @@ -0,0 +1,36 @@ +--- +# Check for existance and then conditionally: +# - evaluate templates +# - PVs +# +# These tasks idempotently create required CFME PV objects. Do not +# call this file directly. This file is intended to be ran as an +# include that has a 'with_items' attached to it. Hence the use below +# of variables like "{{ item.pv_label }}" + +- name: "Check if the {{ item.pv_label }} template has been created already" + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: list + kind: pv + name: "{{ item.pv_name }}" + register: miq_pv_check + +# Skip all of this if the PV already exists +- block: + - name: "Ensure the {{ item.pv_label }} template is evaluated" + template: + src: "{{ item.pv_template }}.j2" + dest: "{{ template_dir }}/{{ item.pv_template }}" + + - name: "Ensure {{ item.pv_label }} is created" + oc_obj: + namespace: "{{ openshift_cfme_project }}" + kind: pv + name: "{{ item.pv_name }}" + state: present + delete_after: True + files: + - "{{ template_dir }}/{{ item.pv_template }}" + when: + - not miq_pv_check.results.results.0 diff --git a/roles/openshift_cfme/tasks/main.yml b/roles/openshift_cfme/tasks/main.yml new file mode 100644 index 000000000..acbce7232 --- /dev/null +++ b/roles/openshift_cfme/tasks/main.yml @@ -0,0 +1,148 @@ +--- +###################################################################### +# Users, projects, and privileges + +- name: Ensure the CFME user exists + oc_user: + state: present + username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace exists with CFME user as admin + oc_project: + state: present + name: "{{ openshift_cfme_project }}" + display_name: "{{ openshift_cfme_project_description }}" + admin: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace service account is privileged + oc_adm_policy_user: + namespace: "{{ openshift_cfme_project }}" + user: "{{ openshift_cfme_service_account }}" + resource_kind: scc + resource_name: privileged + state: present + +###################################################################### +# NFS + +- name: Ensure the /exports/ directory exists + file: + path: /exports/ + state: directory + mode: 0755 + owner: root + group: root + +- name: Ensure the miq-pv0X export directories exist + file: + path: "/exports/{{ item }}" + state: directory + mode: 0775 + owner: root + group: root + with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the NFS exports for CFME PVs exist + copy: + src: openshift_cfme.exports + dest: /etc/exports.d/openshift_cfme.exports + register: nfs_exports_updated + +- name: Ensure the NFS export table is refreshed if exports were added + command: exportfs -ar + when: + - nfs_exports_updated.changed + + +###################################################################### +# Create the required CFME PVs. Check out these online docs if you +# need a refresher on includes looping with items: +# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0 +# * http://stackoverflow.com/a/35128533 +# +# TODO: Handle the case where a PV template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- include: create_pvs.yml + with_items: "{{ openshift_cfme_pv_data }}" + +###################################################################### +# CFME App Template +# +# Note, this is different from the create_pvs.yml tasks in that the +# application template does not require any jinja2 evaluation. +# +# TODO: Handle the case where the server template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- name: Check if the CFME Server template has been created already + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: list + kind: template + name: manageiq + register: miq_server_check + +- name: Copy over CFME Server template + copy: + src: miq-template.yaml + dest: "{{ template_dir }}/miq-template.yaml" + +- name: Ensure the server template was read from disk + debug: + var=r_openshift_cfme_miq_template_content + +- name: Ensure CFME Server Template exists + oc_obj: + namespace: "{{ openshift_cfme_project }}" + kind: template + name: "manageiq" + state: present + content: "{{ r_openshift_cfme_miq_template_content }}" + +###################################################################### +# Let's do this + +- name: Ensure the CFME Server is created + oc_process: + namespace: "{{ openshift_cfme_project }}" + template_name: manageiq + create: True + register: cfme_new_app_process + run_once: True + when: + # User said to install CFME in their inventory + - openshift_cfme_install_app | bool + # # The server app doesn't exist already + # - not miq_server_check.results.results.0 + +- debug: + var: cfme_new_app_process + +###################################################################### +# Various cleanup steps + +# TODO: Not sure what to do about this right now. Might be able to +# just delete it? This currently warns about "Unable to find +# '<TEMP_DIR>' in expected paths." +- name: Ensure the temporary PV/App templates are erased + file: + path: "{{ item }}" + state: absent + with_fileglob: + - "{{ template_dir }}/*.yaml" + +- name: Ensure the temporary PV/app template directory is erased + file: + path: "{{ template_dir }}" + state: absent + +###################################################################### + +- name: Status update + debug: + msg: > + CFME has been deployed. Note that there will be a delay before + it is fully initialized. diff --git a/roles/openshift_cfme/tasks/tune_masters.yml b/roles/openshift_cfme/tasks/tune_masters.yml new file mode 100644 index 000000000..02b0f10bf --- /dev/null +++ b/roles/openshift_cfme/tasks/tune_masters.yml @@ -0,0 +1,12 @@ +--- +- name: Ensure bulk image import limit is tuned + yedit: + src: /etc/origin/master/master-config.yaml + key: 'imagePolicyConfig.maxImagesBulkImportedPerRepository' + value: "{{ openshift_cfme_maxImagesBulkImportedPerRepository | int() }}" + state: present + backup: True + notify: + - restart master + +- meta: flush_handlers diff --git a/roles/openshift_cfme/tasks/uninstall.yml b/roles/openshift_cfme/tasks/uninstall.yml new file mode 100644 index 000000000..cba734a0e --- /dev/null +++ b/roles/openshift_cfme/tasks/uninstall.yml @@ -0,0 +1,43 @@ +--- +- include_role: + name: lib_openshift + +- name: Uninstall CFME - ManageIQ + debug: + msg: Uninstalling Cloudforms Management Engine - ManageIQ + +- name: Ensure the CFME project is removed + oc_project: + state: absent + name: "{{ openshift_cfme_project }}" + +- name: Ensure the CFME template is removed + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: absent + kind: template + name: manageiq + +- name: Ensure the CFME PVs are removed + oc_obj: + state: absent + all_namespaces: True + kind: pv + name: "{{ item }}" + with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the CFME user is removed + oc_user: + state: absent + username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME NFS Exports are removed + file: + path: /etc/exports.d/openshift_cfme.exports + state: absent + register: nfs_exports_removed + +- name: Ensure the NFS export table is refreshed if exports were removed + command: exportfs -ar + when: + - nfs_exports_removed.changed diff --git a/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 new file mode 100644 index 000000000..b8c3bb277 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv01 +spec: + capacity: + storage: 15Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv01 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 new file mode 100644 index 000000000..7218773f0 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv02 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv02 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 new file mode 100644 index 000000000..7b40b6c69 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv03 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv03 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml index 4f25a9c8f..982bd9530 100644 --- a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml @@ -48,7 +48,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms app image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-app - apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -188,7 +188,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms memcached image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-memcached - apiVersion: v1 kind: "DeploymentConfig" metadata: @@ -272,7 +272,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms postgresql image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-postgresql - apiVersion: v1 kind: "DeploymentConfig" metadata: diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml deleted file mode 100644 index 14bdd1dca..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: cloudforms -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - nfs: - path: /opt/nfs/volumes-app - server: 10.19.0.216 - persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml new file mode 100644 index 000000000..250a99b8d --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv01 +spec: + capacity: + storage: 15Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv01 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml deleted file mode 100644 index 709d8d976..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: nfs-pv01 -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - nfs: - path: /opt/nfs/volumes - server: 10.19.0.216 - persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml new file mode 100644 index 000000000..cba9bbe35 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv02 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv02 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml new file mode 100644 index 000000000..c08c21265 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv03 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv03 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml index 4f25a9c8f..3bc6c5813 100644 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml @@ -17,6 +17,7 @@ objects: service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' name: ${NAME} spec: + clusterIP: None ports: - name: http port: 80 @@ -48,11 +49,27 @@ objects: annotations: description: "Keeps track of changes in the CloudForms app image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app + dockerImageRepository: "${APPLICATION_IMG_NAME}" +- apiVersion: v1 + kind: ImageStream + metadata: + name: cfme-openshift-postgresql + annotations: + description: "Keeps track of changes in the CloudForms postgresql image" + spec: + dockerImageRepository: "${POSTGRESQL_IMG_NAME}" +- apiVersion: v1 + kind: ImageStream + metadata: + name: cfme-openshift-memcached + annotations: + description: "Keeps track of changes in the CloudForms memcached image" + spec: + dockerImageRepository: "${MEMCACHED_IMG_NAME}" - apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ${DATABASE_SERVICE_NAME} + name: "${NAME}-${DATABASE_SERVICE_NAME}" spec: accessModes: - ReadWriteOnce @@ -62,45 +79,41 @@ objects: - apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ${NAME} + name: "${NAME}-region" spec: accessModes: - ReadWriteOnce resources: requests: - storage: ${APPLICATION_VOLUME_CAPACITY} -- apiVersion: v1 - kind: "DeploymentConfig" + storage: ${APPLICATION_REGION_VOLUME_CAPACITY} +- apiVersion: apps/v1beta1 + kind: "StatefulSet" metadata: name: ${NAME} annotations: description: "Defines how to deploy the CloudForms appliance" spec: + serviceName: "${NAME}" + replicas: 1 template: metadata: labels: name: ${NAME} name: ${NAME} spec: - volumes: - - - name: "cfme-app-volume" - persistentVolumeClaim: - claimName: ${NAME} containers: - - image: cloudforms/cfme-openshift-app:${APPLICATION_IMG_TAG} - imagePullPolicy: IfNotPresent - name: cloudforms + - name: cloudforms + image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" livenessProbe: - httpGet: - path: / - port: 80 + tcpSocket: + port: 443 initialDelaySeconds: 480 timeoutSeconds: 3 readinessProbe: httpGet: path: / - port: 80 + port: 443 + scheme: HTTPS initialDelaySeconds: 200 timeoutSeconds: 3 ports: @@ -112,8 +125,11 @@ objects: privileged: true volumeMounts: - - name: "cfme-app-volume" + name: "${NAME}-server" mountPath: "/persistent" + - + name: "${NAME}-region" + mountPath: "/persistent-region" env: - name: "APPLICATION_INIT_DELAY" @@ -144,29 +160,32 @@ objects: value: "${POSTGRESQL_SHARED_BUFFERS}" resources: requests: - memory: "${MEMORY_APPLICATION_MIN}" + memory: "${APPLICATION_MEM_REQ}" + cpu: "${APPLICATION_CPU_REQ}" + limits: + memory: "${APPLICATION_MEM_LIMIT}" lifecycle: preStop: exec: command: - /opt/rh/cfme-container-scripts/sync-pv-data - replicas: 1 - selector: - name: ${NAME} - triggers: - - type: "ConfigChange" - - type: "ImageChange" - imageChangeParams: - automatic: true - containerNames: - - "cloudforms" - from: - kind: "ImageStreamTag" - name: "cfme-openshift-app:${APPLICATION_IMG_TAG}" - strategy: - type: "Recreate" - recreateParams: - timeoutSeconds: 1200 + volumes: + - + name: "${NAME}-region" + persistentVolumeClaim: + claimName: ${NAME}-region + volumeClaimTemplates: + - metadata: + name: "${NAME}-server" + annotations: + # Uncomment this if using dynamic volume provisioning. + # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html + # volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: [ ReadWriteOnce ] + resources: + requests: + storage: "${APPLICATION_VOLUME_CAPACITY}" - apiVersion: v1 kind: "Service" metadata: @@ -182,14 +201,6 @@ objects: selector: name: "${MEMCACHED_SERVICE_NAME}" - apiVersion: v1 - kind: ImageStream - metadata: - name: cfme-openshift-memcached - annotations: - description: "Keeps track of changes in the CloudForms memcached image" - spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached -- apiVersion: v1 kind: "DeploymentConfig" metadata: name: "${MEMCACHED_SERVICE_NAME}" @@ -223,7 +234,7 @@ objects: containers: - name: "memcached" - image: "cloudforms/cfme-openshift-memcached:${MEMCACHED_IMG_TAG}" + image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" ports: - containerPort: 11211 @@ -249,8 +260,11 @@ objects: name: "MEMCACHED_SLAB_PAGE_SIZE" value: "${MEMCACHED_SLAB_PAGE_SIZE}" resources: + requests: + memory: "${MEMCACHED_MEM_REQ}" + cpu: "${MEMCACHED_CPU_REQ}" limits: - memory: "${MEMORY_MEMCACHED_LIMIT}" + memory: "${MEMCACHED_MEM_LIMIT}" - apiVersion: v1 kind: "Service" metadata: @@ -266,14 +280,6 @@ objects: selector: name: "${DATABASE_SERVICE_NAME}" - apiVersion: v1 - kind: ImageStream - metadata: - name: cfme-openshift-postgresql - annotations: - description: "Keeps track of changes in the CloudForms postgresql image" - spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql -- apiVersion: v1 kind: "DeploymentConfig" metadata: name: "${DATABASE_SERVICE_NAME}" @@ -307,11 +313,11 @@ objects: - name: "cfme-pgdb-volume" persistentVolumeClaim: - claimName: ${DATABASE_SERVICE_NAME} + claimName: "${NAME}-${DATABASE_SERVICE_NAME}" containers: - name: "postgresql" - image: "cloudforms/cfme-openshift-postgresql:${POSTGRESQL_IMG_TAG}" + image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" ports: - containerPort: 5432 @@ -350,8 +356,11 @@ objects: name: "POSTGRESQL_SHARED_BUFFERS" value: "${POSTGRESQL_SHARED_BUFFERS}" resources: + requests: + memory: "${POSTGRESQL_MEM_REQ}" + cpu: "${POSTGRESQL_CPU_REQ}" limits: - memory: "${MEMORY_POSTGRESQL_LIMIT}" + memory: "${POSTGRESQL_MEM_LIMIT}" parameters: - @@ -420,36 +429,87 @@ parameters: name: "POSTGRESQL_SHARED_BUFFERS" displayName: "PostgreSQL Shared Buffer Amount" description: "Amount of memory dedicated for PostgreSQL shared memory buffers." - value: "64MB" + value: "256MB" - - name: "MEMORY_APPLICATION_MIN" - displayName: "Application Memory Minimum" + name: "APPLICATION_CPU_REQ" + displayName: "Application Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." + value: "1000m" + - + name: "POSTGRESQL_CPU_REQ" + displayName: "PostgreSQL Min CPU Requested" + required: true + description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." + value: "500m" + - + name: "MEMCACHED_CPU_REQ" + displayName: "Memcached Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." + value: "200m" + - + name: "APPLICATION_MEM_REQ" + displayName: "Application Min RAM Requested" required: true description: "Minimum amount of memory the Application container will need." - value: "4096Mi" + value: "6144Mi" + - + name: "POSTGRESQL_MEM_REQ" + displayName: "PostgreSQL Min RAM Requested" + required: true + description: "Minimum amount of memory the PostgreSQL container will need." + value: "1024Mi" - - name: "MEMORY_POSTGRESQL_LIMIT" - displayName: "PostgreSQL Memory Limit" + name: "MEMCACHED_MEM_REQ" + displayName: "Memcached Min RAM Requested" required: true - description: "Maximum amount of memory the PostgreSQL container can use." - value: "2048Mi" + description: "Minimum amount of memory the Memcached container will need." + value: "64Mi" - - name: "MEMORY_MEMCACHED_LIMIT" - displayName: "Memcached Memory Limit" + name: "APPLICATION_MEM_LIMIT" + displayName: "Application Max RAM Limit" required: true - description: "Maximum amount of memory the Memcached container can use." + description: "Maximum amount of memory the Application container can consume." + value: "16384Mi" + - + name: "POSTGRESQL_MEM_LIMIT" + displayName: "PostgreSQL Max RAM Limit" + required: true + description: "Maximum amount of memory the PostgreSQL container can consume." + value: "8192Mi" + - + name: "MEMCACHED_MEM_LIMIT" + displayName: "Memcached Max RAM Limit" + required: true + description: "Maximum amount of memory the Memcached container can consume." value: "256Mi" - + name: "POSTGRESQL_IMG_NAME" + displayName: "PostgreSQL Image Name" + description: "This is the PostgreSQL image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql" + - name: "POSTGRESQL_IMG_TAG" displayName: "PostgreSQL Image Tag" description: "This is the PostgreSQL image tag/version requested to deploy." value: "latest" - + name: "MEMCACHED_IMG_NAME" + displayName: "Memcached Image Name" + description: "This is the Memcached image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-memcached" + - name: "MEMCACHED_IMG_TAG" displayName: "Memcached Image Tag" description: "This is the Memcached image tag/version requested to deploy." value: "latest" - + name: "APPLICATION_IMG_NAME" + displayName: "Application Image Name" + description: "This is the Application image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-app" + - name: "APPLICATION_IMG_TAG" displayName: "Application Image Tag" description: "This is the Application image tag/version requested to deploy." @@ -464,16 +524,22 @@ parameters: displayName: "Application Init Delay" required: true description: "Delay in seconds before we attempt to initialize the application." - value: "30" + value: "15" - name: "APPLICATION_VOLUME_CAPACITY" displayName: "Application Volume Capacity" required: true description: "Volume space available for application data." - value: "1Gi" + value: "5Gi" + - + name: "APPLICATION_REGION_VOLUME_CAPACITY" + displayName: "Application Region Volume Capacity" + required: true + description: "Volume space available for region application data." + value: "5Gi" - name: "DATABASE_VOLUME_CAPACITY" displayName: "Database Volume Capacity" required: true description: "Volume space available for database." - value: "1Gi" + value: "15Gi" diff --git a/roles/openshift_excluder/tasks/install.yml b/roles/openshift_excluder/tasks/install.yml index d09358bee..3a866cedf 100644 --- a/roles/openshift_excluder/tasks/install.yml +++ b/roles/openshift_excluder/tasks/install.yml @@ -1,14 +1,24 @@ --- -- name: Install docker excluder - package: - name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_docker_package_state }}" - when: - - r_openshift_excluder_enable_docker_excluder | bool - -- name: Install openshift excluder - package: - name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_package_state }}" - when: - - r_openshift_excluder_enable_openshift_excluder | bool + +- when: + - not openshift.common.is_atomic | bool + - r_openshift_excluder_install_ran is not defined + + block: + + - name: Install docker excluder + package: + name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_docker_package_state }}" + when: + - r_openshift_excluder_enable_docker_excluder | bool + + - name: Install openshift excluder + package: + name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_package_state }}" + when: + - r_openshift_excluder_enable_openshift_excluder | bool + + - set_fact: + r_openshift_excluder_install_ran: True diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 1b9bda67e..50ed3e964 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -24,12 +24,18 @@ msg: | openshift-ansible requires Python 3 for {{ ansible_distribution }}; For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html - when: ansible_distribution == 'Fedora' and ansible_python['version']['major'] != 3 + when: + - ansible_distribution == 'Fedora' + - ansible_python['version']['major'] != 3 + - r_openshift_facts_ran is not defined - name: Validate python version fail: msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}" - when: ansible_distribution != 'Fedora' and ansible_python['version']['major'] != 2 + when: + - ansible_distribution != 'Fedora' + - ansible_python['version']['major'] != 2 + - r_openshift_facts_ran is not defined # Fail as early as possible if Atomic and old version of Docker - block: @@ -48,7 +54,9 @@ that: - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=') - when: l_is_atomic | bool + when: + - l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Load variables include_vars: "{{ item }}" @@ -59,7 +67,9 @@ - name: Ensure various deps are installed package: name={{ item }} state=present with_items: "{{ required_packages }}" - when: not l_is_atomic | bool + when: + - not l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Ensure various deps for running system containers are installed package: name={{ item }} state=present @@ -67,6 +77,7 @@ when: - not l_is_atomic | bool - l_any_system_container | bool + - r_openshift_facts_ran is not defined - name: Gather Cluster facts and set is_containerized if needed openshift_facts: @@ -99,3 +110,7 @@ - name: Set repoquery command set_fact: repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}" + +- name: Register that this already ran + set_fact: + r_openshift_facts_ran: True diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 27e6fe383..60aacf715 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,24 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" + from openshift_checks import OpenShiftCheck, get_var from openshift_checks.mixins import DockerHostMixin +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} + + class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. @@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] - deployment_image_info = { - "origin": { - "namespace": "openshift", - "name": "origin", - }, - "openshift-enterprise": { - "namespace": "openshift3", - "name": "ose", - }, - } - @classmethod def is_active(cls, task_vars): """Skip hosts with unsupported deployment types.""" deployment_type = get_var(task_vars, "openshift_deployment_type") - has_valid_deployment_type = deployment_type in cls.deployment_image_info + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type @@ -70,51 +74,55 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return {"changed": changed} - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "openshift_deployment_type") - image_info = self.deployment_image_info[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release", default="latest") - openshift_image_tag = get_var(task_vars, "openshift_image_tag") - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - images = set(self.required_docker_images( - image_info["namespace"], - image_info["name"], - ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names - openshift_release, - is_containerized, - )) - - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.required_qualified_docker_images( - image_info["namespace"], - image_info["name"], - openshift_image_tag, - ), - ) - - return images - @staticmethod - def required_docker_images(namespace, name, additional_image_names, version, is_containerized): - if is_containerized: - return ["{}/{}:{}".format(namespace, name, version)] if name else [] - - # include additional non-containerized images specific to the current deployment type - return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - - @staticmethod - def required_qualified_docker_images(namespace, name, version): - # pylint: disable=invalid-name - return [ - "{}/{}-{}:{}".format(namespace, name, suffix, version) - for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] - ] + def required_images(task_vars): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = get_var(task_vars, "openshift_deployment_type") + host_groups = get_var(task_vars, "group_names") + image_tag = get_var(task_vars, "openshift_image_tag") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = get_var(task_vars, "oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if get_var(task_vars, "openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required def local_images(self, images, task_vars): """Filter a list of images and return those available locally.""" @@ -124,7 +132,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): ] def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) if result.get("failed", False): return False @@ -132,6 +141,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): @staticmethod def known_docker_registries(task_vars): + """Build a list of docker registries available according to inventory vars.""" docker_facts = get_var(task_vars, "openshift", "docker") regs = set(docker_facts["additional_registries"]) @@ -147,17 +157,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Inspect existing images using Skopeo and return all images successfully inspected.""" return [ image for image in images - if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) + if self.is_available_skopeo_image(image, registries, task_vars) ] - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" + def is_available_skopeo_image(self, image, registries, task_vars): + """Use Skopeo to determine if required image exists in known registry(s).""" + + # if image does already includes a registry, just use that + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) + for registry in registries: + args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)} + result = self.execute_module("command", args, task_vars=task_vars) + if result.get("rc", 0) == 0 and not result.get("failed"): + return True - args = {"_raw_params": cmd_str} - result = self.module_executor("command", args, task_vars) - return not result.get("failed", False) and result.get("rc", 0) == 0 + return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 7f1751b36..2bd615457 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -34,7 +34,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - info = self.execute_module("docker_info", {}, task_vars) + info = self.execute_module("docker_info", {}, task_vars=task_vars) if info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} @@ -146,7 +146,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not - ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars) + ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) if ret.get("failed") or ret.get("rc", 0) != 0: raise OpenShiftCheckException( "Is LVM installed? Failed to run /sbin/vgs " diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 7f3d78cc4..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -40,8 +40,11 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum") - result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars) + result = self.execute_module( + get_var(task_vars, "ansible_pkg_mgr", default="yum"), + {"name": self.dependencies, "state": "present"}, + task_vars=task_vars, + ) msg = result.get("msg", "") if result.get("failed"): if "No package matching" in msg: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): }, ], } - return self.execute_module("rpm_version", args, task_vars) + return self.execute_module("rpm_version", args, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..0dd2b1286 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) @staticmethod def master_packages(rpm_prefix): @@ -36,8 +36,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): "bash-completion", "cockpit-bridge", "cockpit-docker", - "cockpit-kubernetes", - "cockpit-shell", + "cockpit-system", "cockpit-ws", "etcd", "httpd-tools", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..6a76bb93d 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp, task_vars) + return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 197c65f51..0a7c0f8d3 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == "yum": return {"changed": True} assert module_name == "docker_image_facts" - assert 'name' in args - assert args['name'] + assert 'name' in module_args + assert module_args['name'] return { - 'images': [args['name']], + 'images': [module_args['name']], } result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type='origin', - openshift_release='v3.4', openshift_image_tag='3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} @@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally): docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), ), openshift_deployment_type='origin', - openshift_release='3.4', openshift_image_tag='v3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -108,8 +108,8 @@ def test_all_images_unavailable(): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release=None, - openshift_image_tag='latest' + openshift_image_tag='latest', + group_names=['nodes', 'masters'], )) assert actual['failed'] @@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words): docker=dict(additional_registries=["unknown.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert actual["failed"] @@ -177,8 +177,85 @@ def test_registry_availability(deployment_type, registries): docker=dict(additional_registries=registries), ), openshift_deployment_type=deployment_type, - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ + ( # standard set of stuff required on nodes + "origin", False, ['nodes'], None, + set([ + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # origin version of registry-console + ]) + ), + ( # set a different URL for images + "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', + set([ + 'foo.io/openshift/origin-pod:vtest', + 'foo.io/openshift/origin-deployer:vtest', + 'foo.io/openshift/origin-docker-registry:vtest', + 'foo.io/openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # AFAICS this is not built from the URL + ]) + ), + ( + "origin", True, ['nodes', 'masters', 'etcd'], None, + set([ + # images running on top of openshift + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', + # containerized component images + 'openshift/origin:vtest', + 'openshift/node:vtest', + 'openshift/openvswitch:vtest', + 'registry.access.redhat.com/rhel7/etcd', + ]) + ), + ( # enterprise images + "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'foo.io/openshift3/ose-pod:f13ac45', + 'foo.io/openshift3/ose-deployer:f13ac45', + 'foo.io/openshift3/ose-docker-registry:f13ac45', + 'foo.io/openshift3/ose-haproxy-router:f13ac45', + # registry-console is not constructed/versioned the same as the others. + 'registry.access.redhat.com/openshift3/registry-console', + # containerized images aren't built from oreg_url + 'openshift3/node:vtest', + 'openshift3/openvswitch:vtest', + ]) + ), + ( + "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'registry.access.redhat.com/rhel7/etcd', + # lb does not yet come in a containerized version + ]) + ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=is_containerized, + is_atomic=False, + ), + ), + openshift_deployment_type=deployment_type, + group_names=groups, + oreg_url=oreg_url, + openshift_image_tag='vtest', + ) + + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 292a323db..876614b1d 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -77,7 +77,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ), ]) def test_check_storage_driver(docker_info, failed, expect_msg): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name == "yum": return {} if module_name != "docker_info": @@ -187,7 +187,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): ) ]) def test_vg_free(pool, command_returns, raises, returns): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name != "command": raise ValueError("not expecting module " + module_name) return command_returns diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 751489958..d895e9a68 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -124,6 +124,35 @@ edits: "{{ openshift_hosted_registry_edits }}" force: "{{ True|bool in openshift_hosted_registry_force }}" +- name: Ensure OpenShift registry correctly rolls out (best-effort today) + command: | + oc rollout status deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + failed_when: false + +- name: Determine the latest version of the OpenShift registry deployment + command: | + oc get deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_registry_latest_version + +- name: Sanity-check that the OpenShift registry rolled out correctly + command: | + oc get replicationcontroller {{ openshift_hosted_registry_name }}-{{ openshift_hosted_registry_latest_version.stdout }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_registry_rc_phase + until: "'Running' not in openshift_hosted_registry_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_registry_rc_phase.stdout" + - include: storage/glusterfs.yml when: - openshift.hosted.registry.storage.kind | default(none) == 'glusterfs' or openshift.hosted.registry.storage.glusterfs.swap diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 192afc87a..160ae2f5e 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -55,7 +55,7 @@ state: present with_items: "{{ openshift_hosted_routers }}" -- name: Grant the router serivce account(s) access to the appropriate scc +- name: Grant the router service account(s) access to the appropriate scc oc_adm_policy_user: user: "system:serviceaccount:{{ item.namespace }}:{{ item.serviceaccount }}" namespace: "{{ item.namespace }}" @@ -89,18 +89,37 @@ ports: "{{ item.ports }}" stats_port: "{{ item.stats_port }}" with_items: "{{ openshift_hosted_routers }}" - register: routerout -# This should probably move to module -- name: wait for deploy - pause: - seconds: 30 - when: routerout.changed +- name: Ensure OpenShift router correctly rolls out (best-effort today) + command: | + oc rollout status deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace | default('default') }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + with_items: "{{ openshift_hosted_routers }}" + failed_when: false -- name: Ensure router replica count matches desired - oc_scale: - kind: dc - name: "{{ item.name | default('router') }}" - namespace: "{{ item.namespace | default('default') }}" - replicas: "{{ item.replicas }}" +- name: Determine the latest version of the OpenShift router deployment + command: | + oc get deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_routers_latest_version with_items: "{{ openshift_hosted_routers }}" + +- name: Poll for OpenShift router deployment success + command: | + oc get replicationcontroller {{ item.0.name }}-{{ item.1.stdout }} \ + --namespace {{ item.0.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_router_rc_phase + until: "'Running' not in openshift_hosted_router_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_router_rc_phase.stdout" + with_together: + - "{{ openshift_hosted_routers }}" + - "{{ openshift_hosted_routers_latest_version.results }}" diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md index 0c60ef6fd..dd0f22d4b 100644 --- a/roles/openshift_logging/README.md +++ b/roles/openshift_logging/README.md @@ -55,6 +55,9 @@ When both `openshift_logging_install_logging` and `openshift_logging_upgrade_log - `openshift_logging_fluentd_use_journal`: NOTE: Fluentd will attempt to detect whether or not Docker is using the journald log driver when using the default of empty. - `openshift_logging_fluentd_journal_read_from_head`: If empty, Fluentd will use its internal default, which is false. - `openshift_logging_fluentd_hosts`: List of nodes that should be labeled for Fluentd to be deployed to. Defaults to ['--all']. +- `openshift_logging_fluentd_buffer_queue_limit`: Buffer queue limit for Fluentd. Defaults to 1024. +- `openshift_logging_fluentd_buffer_size_limit`: Buffer chunk limit for Fluentd. Defaults to 1m. + - `openshift_logging_es_host`: The name of the ES service Fluentd should send logs to. Defaults to 'logging-es'. - `openshift_logging_es_port`: The port for the ES service Fluentd should sent its logs to. Defaults to '9200'. @@ -155,3 +158,5 @@ Elasticsearch OPS too, if using an OPS cluster: - `openshift_logging_mux_namespaces`: Default `[]` - additional namespaces to create for _external_ mux clients to associate with their logs - users will need to set this +- `openshift_logging_mux_buffer_queue_limit`: Default `[1024]` - Buffer queue limit for Mux. +- `openshift_logging_mux_buffer_size_limit`: Default `[1m]` - Buffer chunk limit for Mux. diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml index 3c343c9dc..66d880d23 100644 --- a/roles/openshift_logging/defaults/main.yml +++ b/roles/openshift_logging/defaults/main.yml @@ -76,6 +76,8 @@ openshift_logging_fluentd_use_journal: "{{ openshift_hosted_logging_use_journal openshift_logging_fluentd_journal_source: "{{ openshift_hosted_logging_journal_source | default('') }}" openshift_logging_fluentd_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}" openshift_logging_fluentd_hosts: ['--all'] +openshift_logging_fluentd_buffer_queue_limit: 1024 +openshift_logging_fluentd_buffer_size_limit: 1m openshift_logging_es_host: logging-es openshift_logging_es_port: 9200 diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index 7c1062b77..66dc0e096 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -119,6 +119,12 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" + openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" + openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" + openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" + openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" + openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" + openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" with_together: - "{{ openshift_logging_facts.elasticsearch_ops.deploymentconfigs }}" @@ -141,6 +147,12 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" + openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" + openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" + openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" + openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" + openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" + openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" with_sequence: count={{ openshift_logging_es_ops_cluster_size | int - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count }} when: diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index f1d15b76d..684dbe0a0 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -269,6 +269,75 @@ - "{{ tempdir }}/templates/logging-es-dc.yml" delete_after: true +- name: Retrieving the cert to use when generating secrets for the {{ es_component }} component + slurp: + src: "{{ generated_certs_dir }}/{{ item.file }}" + register: key_pairs + with_items: + - { name: "ca_file", file: "ca.crt" } + - { name: "es_key", file: "system.logging.es.key" } + - { name: "es_cert", file: "system.logging.es.crt" } + when: openshift_logging_es_allow_external | bool + +- set_fact: + es_key: "{{ lookup('file', openshift_logging_es_key) | b64encode }}" + when: + - openshift_logging_es_key | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_cert: "{{ lookup('file', openshift_logging_es_cert) | b64encode }}" + when: + - openshift_logging_es_cert | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_ca: "{{ lookup('file', openshift_logging_es_ca_ext) | b64encode }}" + when: + - openshift_logging_es_ca_ext | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_ca: "{{ key_pairs | entry_from_named_pair('ca_file') }}" + when: + - es_ca is not defined + - openshift_logging_es_allow_external | bool + changed_when: false + +- name: Generating Elasticsearch {{ es_component }} route template + template: + src: route_reencrypt.j2 + dest: "{{mktemp.stdout}}/templates/logging-{{ es_component }}-route.yaml" + vars: + obj_name: "logging-{{ es_component }}" + route_host: "{{ openshift_logging_es_hostname }}" + service_name: "logging-{{ es_component }}" + tls_key: "{{ es_key | default('') | b64decode }}" + tls_cert: "{{ es_cert | default('') | b64decode }}" + tls_ca_cert: "{{ es_ca | b64decode }}" + tls_dest_ca_cert: "{{ key_pairs | entry_from_named_pair('ca_file') | b64decode }}" + edge_term_policy: "{{ openshift_logging_es_edge_term_policy | default('') }}" + labels: + component: support + logging-infra: support + provider: openshift + changed_when: no + when: openshift_logging_es_allow_external | bool + +# This currently has an issue if the host name changes +- name: Setting Elasticsearch {{ es_component }} route + oc_obj: + state: present + name: "logging-{{ es_component }}" + namespace: "{{ openshift_logging_elasticsearch_namespace }}" + kind: route + files: + - "{{ tempdir }}/templates/logging-{{ es_component }}-route.yaml" + when: openshift_logging_es_allow_external | bool + ## Placeholder for migration when necessary ## - name: Delete temp directory diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 index 377abe21f..38948ba2f 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 @@ -35,6 +35,12 @@ appender: layout: type: consolePattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + # need this filter until https://github.com/openshift/origin/issues/14515 is fixed + filter: + 1: + type: org.apache.log4j.varia.StringMatchFilter + StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" + AcceptOnMatch: false file: type: dailyRollingFile @@ -43,6 +49,12 @@ appender: layout: type: pattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + # need this filter until https://github.com/openshift/origin/issues/14515 is fixed + filter: + 1: + type: org.apache.log4j.varia.StringMatchFilter + StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" + AcceptOnMatch: false # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files. # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 index 58c325c8a..409e564c2 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 @@ -16,6 +16,7 @@ index: node: master: ${IS_MASTER} data: ${HAS_DATA} + max_local_storage_nodes: 1 network: host: 0.0.0.0 diff --git a/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 new file mode 100644 index 000000000..cf8a9e65f --- /dev/null +++ b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 @@ -0,0 +1,36 @@ +apiVersion: "v1" +kind: "Route" +metadata: + name: "{{obj_name}}" +{% if labels is defined%} + labels: +{% for key, value in labels.iteritems() %} + {{key}}: {{value}} +{% endfor %} +{% endif %} +spec: + host: {{ route_host }} + tls: +{% if tls_key is defined and tls_key | length > 0 %} + key: | +{{ tls_key|indent(6, true) }} +{% if tls_cert is defined and tls_cert | length > 0 %} + certificate: | +{{ tls_cert|indent(6, true) }} +{% endif %} +{% endif %} + caCertificate: | +{% for line in tls_ca_cert.split('\n') %} + {{ line }} +{% endfor %} + destinationCACertificate: | +{% for line in tls_dest_ca_cert.split('\n') %} + {{ line }} +{% endfor %} + termination: reencrypt +{% if edge_term_policy is defined and edge_term_policy | length > 0 %} + insecureEdgeTerminationPolicy: {{ edge_term_policy }} +{% endif %} + to: + kind: Service + name: {{ service_name }} diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index e185938e3..a5695ee26 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -93,6 +93,14 @@ spec: value: "{{ openshift_logging_fluentd_journal_source | default('') }}" - name: "JOURNAL_READ_FROM_HEAD" value: "{{ openshift_logging_fluentd_journal_read_from_head | lower }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_size_limit }}" + - name: "FLUENTD_CPU_LIMIT" + value: "{{ openshift_logging_fluentd_cpu_limit }}" + - name: "FLUENTD_MEMORY_LIMIT" + value: "{{ openshift_logging_fluentd_memory_limit }}" volumes: - name: runlogjournal hostPath: diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml index 10fa4372c..77e47d38c 100644 --- a/roles/openshift_logging_mux/defaults/main.yml +++ b/roles/openshift_logging_mux/defaults/main.yml @@ -10,7 +10,9 @@ openshift_logging_mux_namespace: logging ### Common settings openshift_logging_mux_nodeselector: "{{ openshift_hosted_logging_mux_nodeselector_label | default('') | map_from_pairs }}" openshift_logging_mux_cpu_limit: 500m -openshift_logging_mux_memory_limit: 1Gi +openshift_logging_mux_memory_limit: 2Gi +openshift_logging_mux_buffer_queue_limit: 1024 +openshift_logging_mux_buffer_size_limit: 1m openshift_logging_mux_replicas: 1 diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 502cd3347..243698c6a 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -103,6 +103,14 @@ spec: value: "true" - name: MUX_ALLOW_EXTERNAL value: "{{ openshift_logging_mux_allow_external | default('false') }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_mux_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_mux_buffer_size_limit }}" + - name: "MUX_CPU_LIMIT" + value: "{{ openshift_logging_mux_cpu_limit }}" + - name: "MUX_MEMORY_LIMIT" + value: "{{ openshift_logging_mux_memory_limit }}" volumes: - name: config configMap: diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml index 14a1daf6c..6a082d71a 100644 --- a/roles/openshift_master/defaults/main.yml +++ b/roles/openshift_master/defaults/main.yml @@ -1,4 +1,3 @@ --- openshift_node_ips: [] -# TODO: update setting these values based on the facts -#openshift_version: "{{ openshift_pkg_version | default(openshift_image_tag | default(openshift.docker.openshift_image_tag | default(''))) }}" +r_openshift_master_clean_install: false diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index aed5598c0..035c15fef 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -164,6 +164,26 @@ - restart master api - restart master controllers +- name: Configure master to use etcd3 storage backend on 3.6 clean installs + yedit: + src: /etc/origin/master/master-config.yaml + key: "{{ item.key }}" + value: "{{ item.value }}" + with_items: + - key: kubernetesMasterConfig.apiServerArguments.storage-backend + value: + - etcd3 + - key: kubernetesMasterConfig.apiServerArguments.storage-media-type + value: + - application/vnd.kubernetes.protobuf + when: + - r_openshift_master_clean_install + - openshift.common.version_gte_3_6 + notify: + - restart master + - restart master api + - restart master controllers + - include: set_loopback_context.yml when: openshift.common.version_gte_3_2_or_1_2 diff --git a/roles/openshift_metrics/README.md b/roles/openshift_metrics/README.md index 84503217b..1f10de4a2 100644 --- a/roles/openshift_metrics/README.md +++ b/roles/openshift_metrics/README.md @@ -68,6 +68,9 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml). - `openshift_metrics_resolution`: How often metrics should be gathered. +- `openshift_metrics_install_hawkular_agent`: Install the Hawkular OpenShift Agent (HOSA). HOSA can be used + to collect custom metrics from your pods. This component is currently in tech-preview and is not installed by default. + ## Additional variables to control resource limits Each metrics component (hawkular, cassandra, heapster) can specify a cpu and memory limits and requests by setting the corresponding role variable: diff --git a/roles/openshift_metrics/defaults/main.yaml b/roles/openshift_metrics/defaults/main.yaml index 467db34c8..ba50566e9 100644 --- a/roles/openshift_metrics/defaults/main.yaml +++ b/roles/openshift_metrics/defaults/main.yaml @@ -31,6 +31,14 @@ openshift_metrics_heapster_requests_memory: 0.9375G openshift_metrics_heapster_requests_cpu: null openshift_metrics_heapster_nodeselector: "" +openshift_metrics_install_hawkular_agent: False +openshift_metrics_hawkular_agent_limits_memory: null +openshift_metrics_hawkular_agent_limits_cpu: null +openshift_metrics_hawkular_agent_requests_memory: null +openshift_metrics_hawkular_agent_requests_cpu: null +openshift_metrics_hawkular_agent_nodeselector: "" +openshift_metrics_hawkular_agent_namespace: "default" + openshift_metrics_hawkular_hostname: "hawkular-metrics.{{openshift_master_default_subdomain}}" openshift_metrics_duration: 7 diff --git a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml index fb4fe2f03..7b81b3c10 100644 --- a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml +++ b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml @@ -73,6 +73,8 @@ {{ hawkular_secrets['hawkular-metrics.key'] }} tls.truststore.crt: > {{ hawkular_secrets['hawkular-cassandra.crt'] }} + ca.crt: > + {{ hawkular_secrets['ca.crt'] }} when: name not in metrics_secrets.stdout_lines changed_when: no diff --git a/roles/openshift_metrics/tasks/install_hosa.yaml b/roles/openshift_metrics/tasks/install_hosa.yaml new file mode 100644 index 000000000..cc533a68b --- /dev/null +++ b/roles/openshift_metrics/tasks/install_hosa.yaml @@ -0,0 +1,44 @@ +--- +- name: Generate Hawkular Agent (HOSA) Cluster Role + template: + src: hawkular_openshift_agent_role.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-role.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Service Account + template: + src: hawkular_openshift_agent_sa.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-sa.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Daemon Set + template: + src: hawkular_openshift_agent_ds.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-ds.yaml" + vars: + node_selector: "{{openshift_metrics_hawkular_agent_nodeselector | default('') }}" + changed_when: no + +- name: Generate the Hawkular Agent (HOSA) Configmap + template: + src: hawkular_openshift_agent_cm.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-cm.yaml" + changed_when: no + +- name: Generate role binding for the hawkular-openshift-agent service account + template: + src: rolebinding.j2 + dest: "{{ mktemp.stdout }}/templates/metrics-hawkular-agent-rolebinding.yaml" + vars: + cluster: True + obj_name: hawkular-openshift-agent-rb + labels: + metrics-infra: hawkular-agent + roleRef: + kind: ClusterRole + name: hawkular-openshift-agent + subjects: + - kind: ServiceAccount + name: hawkular-openshift-agent + namespace: "{{openshift_metrics_hawkular_agent_namespace}}" + changed_when: no diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 74eb56713..fdf4ae57f 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -16,11 +16,19 @@ include: install_heapster.yaml when: openshift_metrics_heapster_standalone | bool -- find: paths={{ mktemp.stdout }}/templates patterns=*.yaml +- name: Install Hawkular OpenShift Agent (HOSA) + include: install_hosa.yaml + when: openshift_metrics_install_hawkular_agent | default(false) | bool + +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^(?!metrics-hawkular-openshift-agent).*.yaml" + use_regex: true register: object_def_files changed_when: no -- slurp: src={{item.path}} +- slurp: + src: "{{item.path}}" register: object_defs with_items: "{{object_def_files.files}}" changed_when: no @@ -34,6 +42,31 @@ file_content: "{{ item.content | b64decode | from_yaml }}" with_items: "{{ object_defs.results }}" +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^metrics-hawkular-openshift-agent.*.yaml" + use_regex: true + register: hawkular_agent_object_def_files + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- slurp: + src: "{{item.path}}" + register: hawkular_agent_object_defs + with_items: "{{ hawkular_agent_object_def_files.files }}" + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- name: Create Hawkular Agent objects + include: oc_apply.yaml + vars: + kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" + namespace: "{{ openshift_metrics_hawkular_agent_namespace }}" + file_name: "{{ item.source }}" + file_content: "{{ item.content | b64decode | from_yaml }}" + with_items: "{{ hawkular_agent_object_defs.results }}" + when: openshift_metrics_install_hawkular_agent | bool + - include: update_master_config.yaml - command: > diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 5d8506a73..0b5f23c24 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -44,6 +44,9 @@ - include: "{{ (openshift_metrics_install_metrics | bool) | ternary('install_metrics.yaml','uninstall_metrics.yaml') }}" +- include: uninstall_hosa.yaml + when: not openshift_metrics_install_hawkular_agent | bool + - name: Delete temp directory local_action: file path=local_tmp.stdout state=absent tags: metrics_cleanup diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index dd67703b4..1e1af40e8 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -14,7 +14,7 @@ command: > {{ openshift.common.client_binary }} --config={{ kubeconfig }} apply -f {{ file_name }} - -n {{ openshift_metrics_project }} + -n {{namespace}} register: generation_apply failed_when: "'error' in generation_apply.stderr" changed_when: no diff --git a/roles/openshift_metrics/tasks/uninstall_hosa.yaml b/roles/openshift_metrics/tasks/uninstall_hosa.yaml new file mode 100644 index 000000000..42ed02460 --- /dev/null +++ b/roles/openshift_metrics/tasks/uninstall_hosa.yaml @@ -0,0 +1,15 @@ +--- +- name: remove Hawkular Agent (HOSA) components + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found --selector=metrics-infra=agent + all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings + register: delete_metrics + changed_when: delete_metrics.stdout != 'No resources found' + +- name: remove rolebindings + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found + clusterrolebinding/hawkular-openshift-agent-rb + changed_when: delete_metrics.stdout != 'No resources found' diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 new file mode 100644 index 000000000..bf472c066 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 @@ -0,0 +1,54 @@ +id: hawkular-openshift-agent +kind: ConfigMap +apiVersion: v1 +name: Hawkular OpenShift Agent Configuration +metadata: + name: hawkular-openshift-agent-configuration + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +data: + config.yaml: | + kubernetes: + tenant: ${POD:namespace_name} + hawkular_server: + url: https://hawkular-metrics.openshift-infra.svc.cluster.local + credentials: + username: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.username + password: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.password + ca_cert_file: secret:openshift-infra/hawkular-metrics-certs/ca.crt + emitter: + status_enabled: false + collector: + minimum_collection_interval: 10s + default_collection_interval: 30s + metric_id_prefix: pod/${POD:uid}/custom/ + tags: + metric_name: ${METRIC:name} + description: ${METRIC:description} + units: ${METRIC:units} + namespace_id: ${POD:namespace_uid} + namespace_name: ${POD:namespace_name} + node_name: ${POD:node_name} + pod_id: ${POD:uid} + pod_ip: ${POD:ip} + pod_name: ${POD:name} + pod_namespace: ${POD:namespace_name} + hostname: ${POD:hostname} + host_ip: ${POD:host_ip} + labels: ${POD:labels} + type: pod + collector: hawkular_openshift_agent + custom_metric: true + hawkular-openshift-agent: | + endpoints: + - type: prometheus + protocol: "http" + port: 8080 + path: /metrics + collection_interval: 30s + metrics: + - name: hawkular_openshift_agent_metric_data_points_collected_total + - name: hawkular_openshift_agent_monitored_endpoints + - name: hawkular_openshift_agent_monitored_pods + - name: hawkular_openshift_agent_monitored_metrics diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 new file mode 100644 index 000000000..d65eaf9ae --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 @@ -0,0 +1,91 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: hawkular-openshift-agent + labels: + name: hawkular-openshift-agent + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +spec: + selector: + matchLabels: + name: hawkular-openshift-agent + template: + metadata: + labels: + name: hawkular-openshift-agent + metrics-infra: agent + spec: + serviceAccount: hawkular-openshift-agent +{% if node_selector is iterable and node_selector | length > 0 %} + nodeSelector: +{% for key, value in node_selector.iteritems() %} + {{key}}: "{{value}}" +{% endfor %} +{% endif %} + containers: + - image: {{openshift_metrics_image_prefix}}metrics-hawkular-openshift-agent:{{openshift_metrics_image_version}} + imagePullPolicy: Always + name: hawkular-openshift-agent +{% if ((openshift_metrics_hawkular_agent_limits_cpu is defined and openshift_metrics_hawkular_agent_limits_cpu is not none) + or (openshift_metrics_hawkular_agent_limits_memory is defined and openshift_metrics_hawkular_agent_limits_memory is not none) + or (openshift_metrics_hawkular_agent_requests_cpu is defined and openshift_metrics_hawkular_agent_requests_cpu is not none) + or (openshift_metrics_hawkular_agent_requests_memory is defined and openshift_metrics_hawkular_agent_requests_memory is not none)) +%} + resources: +{% if (openshift_metrics_hawkular_agent_limits_cpu is not none + or openshift_metrics_hawkular_agent_limits_memory is not none) +%} + limits: +{% if openshift_metrics_hawkular_agent_limits_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_limits_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_limits_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_limits_memory}}" +{% endif %} +{% endif %} +{% if (openshift_metrics_hawkular_agent_requests_cpu is not none + or openshift_metrics_hawkular_agent_requests_memory is not none) +%} + requests: +{% if openshift_metrics_hawkular_agent_requests_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_requests_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_requests_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_requests_memory}}" +{% endif %} +{% endif %} +{% endif %} + + livenessProbe: + httpGet: + scheme: HTTP + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 30 + command: + - "hawkular-openshift-agent" + - "-config" + - "/hawkular-openshift-agent-configuration/config.yaml" + - "-v" + - "3" + env: + - name: K8S_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: hawkular-openshift-agent-configuration + mountPath: "/hawkular-openshift-agent-configuration" + volumes: + - name: hawkular-openshift-agent-configuration + configMap: + name: hawkular-openshift-agent-configuration + - name: hawkular-openshift-agent + configMap: + name: hawkular-openshift-agent-configuration diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 new file mode 100644 index 000000000..24b8cd801 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ClusterRole +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent +rules: +- apiGroups: + - "" + resources: + - configmaps + - namespaces + - nodes + - pods + - projects + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - get diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 new file mode 100644 index 000000000..ec604d73c --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 023b1a9b7..8f8550e2d 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -4,7 +4,8 @@ path: /run/ostree-booted register: ostree_booted -- block: +- when: not ostree_booted.stat.exists + block: - name: Ensure libselinux-python is installed package: name=libselinux-python state=present @@ -24,41 +25,40 @@ - openshift_additional_repos | length == 0 notify: refresh cache - # Note: OpenShift repositories under CentOS may be shipped through the - # "centos-release-openshift-origin" package which configures the repository. - # This task matches the file names provided by the package so that they are - # not installed twice in different files and remains idempotent. - - name: Configure origin gpg keys if needed - copy: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - with_items: - - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS - dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS - - src: origin/repos/openshift-ansible-centos-paas-sig.repo - dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo - notify: refresh cache - when: - - ansible_os_family == "RedHat" - - ansible_distribution != "Fedora" - - openshift_deployment_type == 'origin' - - openshift_enable_origin_repo | default(true) | bool - # Singleton block - - when: r_osr_first_run | default(true) + - when: r_openshift_repos_has_run is not defined block: + + # Note: OpenShift repositories under CentOS may be shipped through the + # "centos-release-openshift-origin" package which configures the repository. + # This task matches the file names provided by the package so that they are + # not installed twice in different files and remains idempotent. + - name: Configure origin gpg keys if needed + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + with_items: + - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS + dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS + - src: origin/repos/openshift-ansible-centos-paas-sig.repo + dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo + notify: refresh cache + when: + - ansible_os_family == "RedHat" + - ansible_distribution != "Fedora" + - openshift_deployment_type == 'origin' + - openshift_enable_origin_repo | default(true) | bool + - name: Ensure clean repo cache in the event repos have been changed manually debug: msg: "First run of openshift_repos" changed_when: true notify: refresh cache - - name: Set fact r_osr_first_run false + - name: Record that openshift_repos already ran set_fact: - r_osr_first_run: false + r_openshift_repos_has_run: True # Force running ALL handlers now, because we expect repo cache to be cleared # if changes have been made. - meta: flush_handlers - - when: not ostree_booted.stat.exists |