diff options
35 files changed, 1177 insertions, 95 deletions
diff --git a/README_AWS.md b/README_AWS.md index c511741b9..3a5790eb3 100644 --- a/README_AWS.md +++ b/README_AWS.md @@ -154,18 +154,10 @@ Note: If no deployment type is specified, then the default is origin. ## Post-ansible steps -Create the default router -------------------------- -On the master host: -```sh -oadm router --create=true \ - --credentials=/etc/openshift/master/openshift-router.kubeconfig -``` - -Create the default docker-registry ----------------------------------- -On the master host: -```sh -oadm registry --create=true \ - --credentials=/etc/openshift/master/openshift-registry.kubeconfig -```
\ No newline at end of file + +You should now be ready to follow the **What's Next?** section of the advanced installation guide to deploy your router, registry, and other components. + +Refer to the advanced installation guide for your deployment type: + +* [OpenShift Enterprise](https://docs.openshift.com/enterprise/3.0/install_config/install/advanced_install.html#what-s-next) +* [OpenShift Origin](https://docs.openshift.org/latest/install_config/install/advanced_install.html#what-s-next) diff --git a/README_OSE.md b/README_OSE.md index cce1ec030..79ad07044 100644 --- a/README_OSE.md +++ b/README_OSE.md @@ -101,6 +101,7 @@ ose3-master.example.com # host group for nodes [nodes] +ose3-master.example.com ose3-node[1:2].example.com ``` @@ -116,22 +117,8 @@ ansible-playbook playbooks/byo/config.yml inventory file use the -i option for ansible-playbook. ## Post-ansible steps -#### Create the default router -On the master host: -```sh -oadm router --create=true \ - --credentials=/etc/openshift/master/openshift-router.kubeconfig \ - --images='rcm-img-docker01.build.eng.bos.redhat.com:5001/openshift3/ose-${component}:${version}' -``` -#### Create the default docker-registry -On the master host: -```sh -oadm registry --create=true \ - --credentials=/etc/openshift/master/openshift-registry.kubeconfig \ - --images='rcm-img-docker01.build.eng.bos.redhat.com:5001/openshift3/ose-${component}:${version}' \ - --mount-host=/var/lib/openshift/docker-registry -``` +You should now be ready to follow the [What's Next?](https://docs.openshift.com/enterprise/3.0/install_config/install/advanced_install.html#what-s-next) section of the advanced installation guide to deploy your router, registry, and other components. ## Overriding detected ip addresses and hostnames Some deployments will require that the user override the detected hostnames diff --git a/README_libvirt.md b/README_libvirt.md index 1a710ff3b..3f8bbb5f0 100644 --- a/README_libvirt.md +++ b/README_libvirt.md @@ -94,7 +94,8 @@ dns=dnsmasq - Configure dnsmasq to use the Virtual Network router for example.com: ```sh -sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf server=/example.com/192.168.55.1 +sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf +server=/example.com/192.168.55.1 ``` Test The Setup diff --git a/README_origin.md b/README_origin.md index f13fe660a..cb213a93a 100644 --- a/README_origin.md +++ b/README_origin.md @@ -73,6 +73,7 @@ osv3-master.example.com # host group for nodes [nodes] +osv3-master.example.com osv3-node[1:2].example.com ``` @@ -88,23 +89,8 @@ ansible-playbook playbooks/byo/config.yml inventory file use the -i option for ansible-playbook. ## Post-ansible steps -#### Create the default router -On the master host: -```sh -oadm router --create=true \ - --credentials=/etc/openshift/master/openshift-router.kubeconfig -``` - -#### Create the default docker-registry -On the master host: -```sh -oadm registry --create=true \ - --credentials=/etc/openshift/master/openshift-registry.kubeconfig -``` -If you would like persistent storage, refer to the -[OpenShift documentation](https://docs.openshift.org/latest/admin_guide/install/docker_registry.html) -for more information on deployment options for the built in docker-registry. +You should now be ready to follow the [What's Next?](https://docs.openshift.org/latest/install_config/install/advanced_install.html#what-s-next) section of the advanced installation guide to deploy your router, registry, and other components. ## Overriding detected ip addresses and hostnames Some deployments will require that the user override the detected hostnames diff --git a/bin/cluster b/bin/cluster index a19434e21..582327415 100755 --- a/bin/cluster +++ b/bin/cluster @@ -260,7 +260,7 @@ if __name__ == '__main__': choices=['origin', 'online', 'enterprise'], help='Deployment type. (default: origin)') meta_parser.add_argument('-T', '--product-type', - choices=['openshift' 'atomic-enterprise'], + choices=['openshift', 'atomic-enterprise'], help='Product type. (default: openshift)') meta_parser.add_argument('-o', '--option', action='append', help='options') diff --git a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml index c9ae923bb..b6a2d2f26 100644 --- a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml +++ b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml @@ -27,9 +27,8 @@ gather_facts: no vars: - cli_volume_type: io1 + cli_volume_type: gp2 cli_volume_size: 30 - cli_volume_iops: "{{ 30 * cli_volume_size }}" pre_tasks: - fail: @@ -104,7 +103,6 @@ volume_size: "{{ cli_volume_size | default(30, True)}}" volume_type: "{{ cli_volume_type }}" device_name: /dev/xvdb - iops: "{{ 30 * cli_volume_size }}" register: vol - debug: var=vol @@ -142,10 +140,3 @@ - debug: var=dockerstart - - name: Wait for docker to stabilize - pause: - seconds: 30 - - # leaving off the '-t' for docker exec. With it, it doesn't work with ansible and tty support - - name: update zabbix docker items - command: docker exec -i oso-rhel7-zagg-client /usr/local/bin/cron-send-docker-metrics.py diff --git a/playbooks/adhoc/grow_docker_vg/filter_plugins/oo_filters.py b/playbooks/adhoc/grow_docker_vg/filter_plugins/oo_filters.py new file mode 100644 index 000000000..d0264cde9 --- /dev/null +++ b/playbooks/adhoc/grow_docker_vg/filter_plugins/oo_filters.py @@ -0,0 +1,41 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +Custom filters for use in openshift-ansible +''' + +import pdb + + +class FilterModule(object): + ''' Custom ansible filters ''' + + @staticmethod + def oo_pdb(arg): + ''' This pops you into a pdb instance where arg is the data passed in + from the filter. + Ex: "{{ hostvars | oo_pdb }}" + ''' + pdb.set_trace() + return arg + + @staticmethod + def translate_volume_name(volumes, target_volume): + ''' + This filter matches a device string /dev/sdX to /dev/xvdX + It will then return the AWS volume ID + ''' + for vol in volumes: + translated_name = vol["attachment_set"]["device"].replace("/dev/sd", "/dev/xvd") + if target_volume.startswith(translated_name): + return vol["id"] + + return None + + + def filters(self): + ''' returns a mapping of filters to methods ''' + return { + "translate_volume_name": self.translate_volume_name, + } diff --git a/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml b/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml new file mode 100644 index 000000000..ef9b45abd --- /dev/null +++ b/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml @@ -0,0 +1,206 @@ +--- +# This playbook grows the docker VG on a node by: +# * add a new volume +# * add volume to the existing VG. +# * pv move to the new volume. +# * remove old volume +# * detach volume +# * mark old volume in AWS with "REMOVE ME" tag +# * grow docker LVM to 90% of the VG +# +# To run: +# 1. Source your AWS credentials (make sure it's the corresponding AWS account) into your environment +# export AWS_ACCESS_KEY_ID='XXXXX' +# export AWS_SECRET_ACCESS_KEY='XXXXXX' +# +# 2. run the playbook: +# ansible-playbook -e 'cli_tag_name=<tag-name>' grow_docker_vg.yml +# +# Example: +# ansible-playbook -e 'cli_tag_name=ops-compute-12345' grow_docker_vg.yml +# +# Notes: +# * By default this will do a 55GB GP2 volume. The can be overidden with the "-e 'cli_volume_size=100'" variable +# * This does a GP2 by default. Support for Provisioned IOPS has not been added +# * This will assign the new volume to /dev/xvdc. This is not variablized, yet. +# * This can be done with NO downtime on the host +# * This playbook assumes that there is a Logical Volume that is installed and called "docker-pool". This is +# the LV that gets created via the "docker-storage-setup" command +# + +- name: Grow the docker volume group + hosts: "tag_Name_{{ cli_tag_name }}" + user: root + connection: ssh + gather_facts: no + + vars: + cli_volume_type: gp2 + cli_volume_size: 55 +# cli_volume_iops: "{{ 30 * cli_volume_size }}" + + pre_tasks: + - fail: + msg: "This playbook requires {{item}} to be set." + when: "{{ item }} is not defined or {{ item }} == ''" + with_items: + - cli_tag_name + - cli_volume_size + + - debug: + var: hosts + + - name: start docker + service: + name: docker + state: started + + - name: Determine if Storage Driver (docker info) is devicemapper + shell: docker info | grep 'Storage Driver:.*devicemapper' + register: device_mapper_check + ignore_errors: yes + + - debug: + var: device_mapper_check + + - name: fail if we don't detect devicemapper + fail: + msg: The "Storage Driver" in "docker info" is not set to "devicemapper"! Please investigate manually. + when: device_mapper_check.rc == 1 + + # docker-storage-setup creates a docker-pool as the lvm. I am using docker-pool lvm to test + # and find the volume group. + - name: Attempt to find the Volume Group that docker is using + shell: lvs | grep docker-pool | awk '{print $2}' + register: docker_vg_name + ignore_errors: yes + + - debug: + var: docker_vg_name + + - name: fail if we don't find a docker volume group + fail: + msg: Unable to find docker volume group. Please investigate manually. + when: docker_vg_name.stdout_lines|length != 1 + + # docker-storage-setup creates a docker-pool as the lvm. I am using docker-pool lvm to test + # and find the physical volume. + - name: Attempt to find the Phyisical Volume that docker is using + shell: "pvs | grep {{ docker_vg_name.stdout }} | awk '{print $1}'" + register: docker_pv_name + ignore_errors: yes + + - debug: + var: docker_pv_name + + - name: fail if we don't find a docker physical volume + fail: + msg: Unable to find docker physical volume. Please investigate manually. + when: docker_pv_name.stdout_lines|length != 1 + + + - name: get list of volumes from AWS + delegate_to: localhost + ec2_vol: + state: list + instance: "{{ ec2_id }}" + region: "{{ ec2_region }}" + register: attached_volumes + + - debug: var=attached_volumes + + - name: get volume id of current docker volume + set_fact: + old_docker_volume_id: "{{ attached_volumes.volumes | translate_volume_name(docker_pv_name.stdout) }}" + + - debug: var=old_docker_volume_id + + - name: check to see if /dev/xvdc exists + command: test -e /dev/xvdc + register: xvdc_check + ignore_errors: yes + + - debug: var=xvdc_check + + - name: fail if /dev/xvdc already exists + fail: + msg: /dev/xvdc already exists. Please investigate + when: xvdc_check.rc == 0 + + - name: Create a volume and attach it + delegate_to: localhost + ec2_vol: + state: present + instance: "{{ ec2_id }}" + region: "{{ ec2_region }}" + volume_size: "{{ cli_volume_size | default(30, True)}}" + volume_type: "{{ cli_volume_type }}" + device_name: /dev/xvdc + register: create_volume + + - debug: var=create_volume + + - name: Fail when problems creating volumes and attaching + fail: + msg: "Failed to create or attach volume msg: {{ create_volume.msg }}" + when: create_volume.msg is defined + + - name: tag the vol with a name + delegate_to: localhost + ec2_tag: region={{ ec2_region }} resource={{ create_volume.volume_id }} + args: + tags: + Name: "{{ ec2_tag_Name }}" + env: "{{ ec2_tag_environment }}" + register: voltags + + - name: check for attached drive + command: test -b /dev/xvdc + register: attachment_check + until: attachment_check.rc == 0 + retries: 30 + delay: 2 + + - name: partition the new drive and make it lvm + command: parted /dev/xvdc --script -- mklabel msdos mkpart primary 0% 100% set 1 lvm + + - name: pvcreate /dev/xvdc + command: pvcreate /dev/xvdc1 + + - name: Extend the docker volume group + command: vgextend "{{ docker_vg_name.stdout }}" /dev/xvdc1 + + - name: pvmove onto new volume + command: "pvmove {{ docker_pv_name.stdout }} /dev/xvdc1" + async: 3600 + poll: 10 + + - name: Remove the old docker drive from the volume group + command: "vgreduce {{ docker_vg_name.stdout }} {{ docker_pv_name.stdout }}" + + - name: Remove the pv from the old drive + command: "pvremove {{ docker_pv_name.stdout }}" + + - name: Extend the docker lvm + command: "lvextend -l '90%VG' /dev/{{ docker_vg_name.stdout }}/docker-pool" + + - name: detach old docker volume + delegate_to: localhost + ec2_vol: + region: "{{ ec2_region }}" + id: "{{ old_docker_volume_id }}" + instance: None + + - name: tag the old vol valid label + delegate_to: localhost + ec2_tag: region={{ ec2_region }} resource={{old_docker_volume_id}} + args: + tags: + Name: "{{ ec2_tag_Name }} REMOVE ME" + register: voltags + + - name: Update the /etc/sysconfig/docker-storage-setup with new device + lineinfile: + dest: /etc/sysconfig/docker-storage-setup + regexp: ^DEVS= + line: DEVS=/dev/xvdc diff --git a/playbooks/adhoc/upgrades/README.md b/playbooks/adhoc/upgrades/README.md new file mode 100644 index 000000000..6de8a970f --- /dev/null +++ b/playbooks/adhoc/upgrades/README.md @@ -0,0 +1,21 @@ +# [NOTE] +This playbook will re-run installation steps overwriting any local +modifications. You should ensure that your inventory has been updated with any +modifications you've made after your initial installation. If you find any items +that cannot be configured via ansible please open an issue at +https://github.com/openshift/openshift-ansible + +# Overview +This playbook is available as a technical preview. It currently performs the +following steps. + + * Upgrade and restart master services + * Upgrade and restart node services + * Applies latest configuration by re-running the installation playbook + * Applies the latest cluster policies + * Updates the default router if one exists + * Updates the default registry if one exists + * Updates image streams and quickstarts + +# Usage +ansible-playbook -i ~/ansible-inventory openshift-ansible/playbooks/adhoc/upgrades/upgrade.yml diff --git a/playbooks/adhoc/upgrades/filter_plugins b/playbooks/adhoc/upgrades/filter_plugins new file mode 120000 index 000000000..b0b7a3414 --- /dev/null +++ b/playbooks/adhoc/upgrades/filter_plugins @@ -0,0 +1 @@ +../../../filter_plugins/
\ No newline at end of file diff --git a/playbooks/adhoc/upgrades/lookup_plugins b/playbooks/adhoc/upgrades/lookup_plugins new file mode 120000 index 000000000..73cafffe5 --- /dev/null +++ b/playbooks/adhoc/upgrades/lookup_plugins @@ -0,0 +1 @@ +../../../lookup_plugins/
\ No newline at end of file diff --git a/playbooks/adhoc/upgrades/roles b/playbooks/adhoc/upgrades/roles new file mode 120000 index 000000000..e2b799b9d --- /dev/null +++ b/playbooks/adhoc/upgrades/roles @@ -0,0 +1 @@ +../../../roles/
\ No newline at end of file diff --git a/playbooks/adhoc/upgrades/upgrade.yml b/playbooks/adhoc/upgrades/upgrade.yml new file mode 100644 index 000000000..e666f0472 --- /dev/null +++ b/playbooks/adhoc/upgrades/upgrade.yml @@ -0,0 +1,115 @@ +--- +- name: Re-Run cluster configuration to apply latest configuration changes + include: ../../common/openshift-cluster/config.yml + vars: + g_etcd_group: "{{ 'etcd' }}" + g_masters_group: "{{ 'masters' }}" + g_nodes_group: "{{ 'nodes' }}" + openshift_cluster_id: "{{ cluster_id | default('default') }}" + openshift_deployment_type: "{{ deployment_type }}" + +- name: Upgrade masters + hosts: masters + vars: + openshift_version: "{{ openshift_pkg_version | default('') }}" + tasks: + - name: Upgrade master packages + yum: pkg={{ openshift.common.service_type }}-master{{ openshift_version }} state=latest + - name: Restart master services + service: name="{{ openshift.common.service_type}}-master" state=restarted + +- name: Upgrade nodes + hosts: nodes + vars: + openshift_version: "{{ openshift_pkg_version | default('') }}" + tasks: + - name: Upgrade node packages + yum: pkg={{ openshift.common.service_type }}-node{{ openshift_version }} state=latest + - name: Restart node services + service: name="{{ openshift.common.service_type }}-node" state=restarted + +- name: Determine new master version + hosts: oo_first_master + tasks: + - name: Determine new version + command: > + rpm -q --queryformat '%{version}' {{ openshift.common.service_type }}-master + register: _new_version + +- name: Ensure AOS 3.0.2 or Origin 1.0.6 + hosts: oo_first_master + tasks: + fail: This playbook requires Origin 1.0.6 or Atomic OpenShift 3.0.2 or later + when: _new_version.stdout < 1.0.6 or (_new_version.stdout >= 3.0 and _new_version.stdout < 3.0.2) + +- name: Update cluster policy + hosts: oo_first_master + tasks: + - name: oadm policy reconcile-cluster-roles --confirm + command: > + {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig + policy reconcile-cluster-roles --confirm + +- name: Upgrade default router + hosts: oo_first_master + vars: + - router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', 'v' + _new_version.stdout ) }}" + - oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig" + tasks: + - name: Check for default router + command: > + {{ oc_cmd }} get -n default dc/router + register: _default_router + failed_when: false + changed_when: false + - name: Check for allowHostNetwork and allowHostPorts + when: _default_router.rc == 0 + shell: > + {{ oc_cmd }} get -o yaml scc/privileged | /usr/bin/grep -e allowHostPorts -e allowHostNetwork + register: _scc + - name: Grant allowHostNetwork and allowHostPorts + when: + - _default_router.rc == 0 + - "'false' in _scc.stdout" + command: > + {{ oc_cmd }} patch scc/privileged -p '{"allowHostPorts":true,"allowHostNetwork":true}' --loglevel=9 + - name: Update deployment config to 1.0.4/3.0.1 spec + when: _default_router.rc == 0 + command: > + {{ oc_cmd }} patch dc/router -p + '{"spec":{"strategy":{"rollingParams":{"updatePercent":-10},"spec":{"serviceAccount":"router","serviceAccountName":"router"}}}}' + - name: Switch to hostNetwork=true + when: _default_router.rc == 0 + command: > + {{ oc_cmd }} patch dc/router -p '{"spec":{"template":{"spec":{"hostNetwork":true}}}}' + - name: Update router image to current version + when: _default_router.rc == 0 + command: > + {{ oc_cmd }} patch dc/router -p + '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}"}]}}}}' + +- name: Upgrade default + hosts: oo_first_master + vars: + - registry_image: "{{ openshift.master.registry_url | replace( '${component}', 'docker-registry' ) | replace ( '${version}', 'v' + _new_version.stdout ) }}" + - oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig" + tasks: + - name: Check for default registry + command: > + {{ oc_cmd }} get -n default dc/docker-registry + register: _default_registry + failed_when: false + changed_when: false + - name: Update registry image to current version + when: _default_registry.rc == 0 + command: > + {{ oc_cmd }} patch dc/docker-registry -p + '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}' + +- name: Update image streams and templates + hosts: oo_first_master + vars: + openshift_examples_import_command: "update" + openshift_deployment_type: "{{ deployment_type }}" + roles: + - openshift_examples diff --git a/playbooks/aws/openshift-cluster/tasks/launch_instances.yml b/playbooks/aws/openshift-cluster/tasks/launch_instances.yml index b77bcdc1a..9c699120b 100644 --- a/playbooks/aws/openshift-cluster/tasks/launch_instances.yml +++ b/playbooks/aws/openshift-cluster/tasks/launch_instances.yml @@ -172,6 +172,7 @@ - rotate 7 - compress - sharedscripts + - missingok scripts: postrotate: "/bin/kill -HUP `cat /var/run/syslogd.pid 2> /dev/null` 2> /dev/null || true" diff --git a/playbooks/libvirt/openshift-cluster/templates/network.xml b/playbooks/libvirt/openshift-cluster/templates/network.xml index 86dcd62bb..050bc7ab9 100644 --- a/playbooks/libvirt/openshift-cluster/templates/network.xml +++ b/playbooks/libvirt/openshift-cluster/templates/network.xml @@ -8,7 +8,7 @@ <!-- TODO: query for first available virbr interface available --> <bridge name='virbr3' stp='on' delay='0'/> <!-- TODO: make overridable --> - <domain name='example.com'/> + <domain name='example.com' localOnly='yes' /> <dns> <!-- TODO: automatically add host entries --> </dns> diff --git a/roles/ansible_tower/tasks/main.yaml b/roles/ansible_tower/tasks/main.yaml index c110a3b70..b7757214d 100644 --- a/roles/ansible_tower/tasks/main.yaml +++ b/roles/ansible_tower/tasks/main.yaml @@ -9,6 +9,7 @@ - ansible - telnet - ack + - pylint - name: download Tower setup get_url: url=http://releases.ansible.com/ansible-tower/setup/ansible-tower-setup-2.1.1.tar.gz dest=/opt/ force=no @@ -38,5 +39,3 @@ regexp: "^({{ item.option }})( *)=" line: '\1\2= {{ item.value }}' with_items: config_changes | default([], true) - - diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 27bfb7de9..656901409 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -38,6 +38,7 @@ template: src: etcd.conf.j2 dest: /etc/etcd/etcd.conf + backup: true notify: - restart etcd diff --git a/roles/etcd_ca/tasks/main.yml b/roles/etcd_ca/tasks/main.yml index 8a266f732..625756867 100644 --- a/roles/etcd_ca/tasks/main.yml +++ b/roles/etcd_ca/tasks/main.yml @@ -18,6 +18,7 @@ - template: dest: "{{ etcd_ca_dir }}/fragments/openssl_append.cnf" src: openssl_append.j2 + backup: true - assemble: src: "{{ etcd_ca_dir }}/fragments" diff --git a/roles/fluentd_master/tasks/main.yml b/roles/fluentd_master/tasks/main.yml index 69f8eceab..55cd94460 100644 --- a/roles/fluentd_master/tasks/main.yml +++ b/roles/fluentd_master/tasks/main.yml @@ -52,4 +52,3 @@ name: 'td-agent' state: started enabled: yes - diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py index 4ec1b8e02..e7fd6fa21 100644 --- a/roles/lib_zabbix/library/zbx_itemprototype.py +++ b/roles/lib_zabbix/library/zbx_itemprototype.py @@ -128,12 +128,12 @@ def get_status(status): return _status -def get_app_ids(zapi, application_names): +def get_app_ids(zapi, application_names, templateid): ''' get application ids from names ''' app_ids = [] for app_name in application_names: - content = zapi.get_content('application', 'get', {'search': {'name': app_name}}) + content = zapi.get_content('application', 'get', {'filter': {'name': app_name}, 'templateids': templateid}) if content.has_key('result'): app_ids.append(content['result'][0]['applicationid']) return app_ids @@ -212,7 +212,7 @@ def main(): 'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']), 'type': get_type(module.params['ztype']), 'value_type': get_value_type(module.params['value_type']), - 'applications': get_app_ids(zapi, module.params['applications']), + 'applications': get_app_ids(zapi, module.params['applications'], template['templateid']), 'description': module.params['description'], } diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py index a05de7e68..ab7731faa 100644 --- a/roles/lib_zabbix/library/zbx_trigger.py +++ b/roles/lib_zabbix/library/zbx_trigger.py @@ -74,6 +74,36 @@ def get_deps(zapi, deps): return results + +def get_trigger_status(inc_status): + ''' Determine the trigger's status + 0 is enabled + 1 is disabled + ''' + r_status = 0 + if inc_status == 'disabled': + r_status = 1 + + return r_status + +def get_template_id(zapi, template_name): + ''' + get related templates + ''' + template_ids = [] + app_ids = {} + # Fetch templates by name + content = zapi.get_content('template', + 'get', + {'search': {'host': template_name}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + template_ids.append(content['result'][0]['templateid']) + for app in content['result'][0]['applications']: + app_ids[app['name']] = app['applicationid'] + + return template_ids, app_ids + def main(): ''' Create a trigger in zabbix @@ -103,7 +133,9 @@ def main(): dependencies=dict(default=[], type='list'), priority=dict(default='avg', type='str'), url=dict(default=None, type='str'), + status=dict(default=None, type='str'), state=dict(default='present', type='str'), + template_name=dict(default=None, type='str'), ), #supports_check_mode=True ) @@ -119,11 +151,16 @@ def main(): state = module.params['state'] tname = module.params['name'] + templateid = None + if module.params['template_name']: + templateid, _ = get_template_id(zapi, module.params['template_name']) + content = zapi.get_content(zbx_class_name, 'get', {'filter': {'description': tname}, 'expandExpression': True, 'selectDependencies': 'triggerid', + 'templateids': templateid, }) # Get @@ -145,6 +182,7 @@ def main(): 'dependencies': get_deps(zapi, module.params['dependencies']), 'priority': get_priority(module.params['priority']), 'url': module.params['url'], + 'status': get_trigger_status(module.params['status']), } # Remove any None valued params @@ -156,6 +194,10 @@ def main(): if not exists(content): # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state='present') ######## diff --git a/roles/lib_zabbix/library/zbx_triggerprototype.py b/roles/lib_zabbix/library/zbx_triggerprototype.py new file mode 100644 index 000000000..c1224b268 --- /dev/null +++ b/roles/lib_zabbix/library/zbx_triggerprototype.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python +''' +ansible module for zabbix triggerprototypes +''' +# vim: expandtab:tabstop=4:shiftwidth=4 +# +# Zabbix triggerprototypes ansible module +# +# +# Copyright 2015 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This is in place because each module looks similar to each other. +# These need duplicate code as their behavior is very similar +# but different for each zabbix class. +# pylint: disable=duplicate-code + +# pylint: disable=import-error +from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection + +def exists(content, key='result'): + ''' Check if key exists in content or the size of content[key] > 0 + ''' + if not content.has_key(key): + return False + + if not content[key]: + return False + + return True + +def get_priority(priority): + ''' determine priority + ''' + prior = 0 + if 'info' in priority: + prior = 1 + elif 'warn' in priority: + prior = 2 + elif 'avg' == priority or 'ave' in priority: + prior = 3 + elif 'high' in priority: + prior = 4 + elif 'dis' in priority: + prior = 5 + + return prior + +def get_trigger_status(inc_status): + ''' Determine the trigger's status + 0 is enabled + 1 is disabled + ''' + r_status = 0 + if inc_status == 'disabled': + r_status = 1 + + return r_status + + +def main(): + ''' + Create a triggerprototype in zabbix + ''' + + module = AnsibleModule( + argument_spec=dict( + zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), + zbx_user=dict(default=os.environ.get('ZABBIX_USER', None), type='str'), + zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'), + zbx_debug=dict(default=False, type='bool'), + name=dict(default=None, type='str'), + expression=dict(default=None, type='str'), + description=dict(default=None, type='str'), + priority=dict(default='avg', type='str'), + url=dict(default=None, type='str'), + status=dict(default=None, type='str'), + state=dict(default='present', type='str'), + ), + #supports_check_mode=True + ) + + zapi = ZabbixAPI(ZabbixConnection(module.params['zbx_server'], + module.params['zbx_user'], + module.params['zbx_password'], + module.params['zbx_debug'])) + + #Set the instance and the template for the rest of the calls + zbx_class_name = 'triggerprototype' + idname = "triggerid" + state = module.params['state'] + tname = module.params['name'] + + content = zapi.get_content(zbx_class_name, + 'get', + {'filter': {'description': tname}, + 'expandExpression': True, + 'selectDependencies': 'triggerid', + }) + + # Get + if state == 'list': + module.exit_json(changed=False, results=content['result'], state="list") + + # Delete + if state == 'absent': + if not exists(content): + module.exit_json(changed=False, state="absent") + content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) + module.exit_json(changed=True, results=content['result'], state="absent") + + # Create and Update + if state == 'present': + params = {'description': tname, + 'comments': module.params['description'], + 'expression': module.params['expression'], + 'priority': get_priority(module.params['priority']), + 'url': module.params['url'], + 'status': get_trigger_status(module.params['status']), + } + + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# + if not exists(content): + # if we didn't find it, create it + content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state='present') + + ######## + # UPDATE + ######## + differences = {} + zab_results = content['result'][0] + for key, value in params.items(): + + if zab_results[key] != value and zab_results[key] != str(value): + differences[key] = value + + if not differences: + module.exit_json(changed=False, results=zab_results, state="present") + + # We have differences and need to update + differences[idname] = zab_results[idname] + content = zapi.get_content(zbx_class_name, 'update', differences) + module.exit_json(changed=True, results=content['result'], state="present") + + + module.exit_json(failed=True, + changed=False, + results='Unknown state passed. %s' % state, + state="unknown") + +# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled +# import module snippets. This are required +from ansible.module_utils.basic import * + +main() diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml index b4821bdc7..41381e76c 100644 --- a/roles/lib_zabbix/tasks/create_template.yml +++ b/roles/lib_zabbix/tasks/create_template.yml @@ -9,7 +9,8 @@ - set_fact: - lzbx_applications: "{{ template.zitems | oo_select_keys_from_list(['applications']) | oo_flatten | unique }}" + lzbx_item_applications: "{{ template.zitems | default([], True) | oo_select_keys_from_list(['applications']) | oo_flatten | unique }}" + lzbx_itemprototype_applications: "{{ template.zitemprototypes | default([], True) | oo_select_keys_from_list(['applications']) | oo_flatten | unique }}" - name: Create Application zbx_application: @@ -18,9 +19,11 @@ zbx_password: "{{ password }}" name: "{{ item }}" template_name: "{{ template.name }}" - with_items: lzbx_applications + with_items: + - "{{ lzbx_item_applications }}" + - "{{ lzbx_itemprototype_applications }}" register: created_application - when: template.zitems is defined + when: template.zitems is defined or template.zitemprototypes is defined - name: Create Items zbx_item: @@ -66,7 +69,7 @@ with_items: template.zdiscoveryrules when: template.zdiscoveryrules is defined -- name: Create Item Prototype +- name: Create Item Prototypes zbx_itemprototype: zbx_server: "{{ server }}" zbx_user: "{{ user }}" @@ -80,3 +83,16 @@ description: "{{ item.description | default('', True) }}" with_items: template.zitemprototypes when: template.zitemprototypes is defined + +- name: Create Trigger Prototypes + zbx_triggerprototype: + zbx_server: "{{ server }}" + zbx_user: "{{ user }}" + zbx_password: "{{ password }}" + name: "{{ item.name }}" + expression: "{{ item.expression }}" + url: "{{ item.url | default('', True) }}" + priority: "{{ item.priority | default('average', True) }}" + description: "{{ item.description | default('', True) }}" + with_items: template.ztriggerprototypes + when: template.ztriggerprototypes is defined diff --git a/roles/openshift_examples/defaults/main.yml b/roles/openshift_examples/defaults/main.yml index 3246790aa..7d4f100e3 100644 --- a/roles/openshift_examples/defaults/main.yml +++ b/roles/openshift_examples/defaults/main.yml @@ -14,3 +14,5 @@ db_templates_base: "{{ examples_base }}/db-templates" xpaas_image_streams: "{{ examples_base }}/xpaas-streams/jboss-image-streams.json" xpaas_templates_base: "{{ examples_base }}/xpaas-templates" quickstarts_base: "{{ examples_base }}/quickstart-templates" + +openshift_examples_import_command: "create" diff --git a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json index 0497e6824..5df36ccc2 100644 --- a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json +++ b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json @@ -6,10 +6,10 @@ "iconClass" : "icon-jboss", "description": "Application template for EAP 6 applications built using STI." }, - "name": "eap6-basic-sti" + "name": "eap6-https-sti" }, "labels": { - "template": "eap6-basic-sti" + "template": "eap6-https-sti" }, "parameters": [ { diff --git a/roles/openshift_examples/tasks/main.yml b/roles/openshift_examples/tasks/main.yml index bfc6dfb0a..3a829a4c6 100644 --- a/roles/openshift_examples/tasks/main.yml +++ b/roles/openshift_examples/tasks/main.yml @@ -7,7 +7,7 @@ # RHEL and Centos image streams are mutually exclusive - name: Import RHEL streams command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ rhel_image_streams }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ rhel_image_streams }} when: openshift_examples_load_rhel register: oex_import_rhel_streams failed_when: "'already exists' not in oex_import_rhel_streams.stderr and oex_import_rhel_streams.rc != 0" @@ -15,7 +15,7 @@ - name: Import Centos Image streams command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ centos_image_streams }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ centos_image_streams }} when: openshift_examples_load_centos | bool register: oex_import_centos_streams failed_when: "'already exists' not in oex_import_centos_streams.stderr and oex_import_centos_streams.rc != 0" @@ -23,7 +23,7 @@ - name: Import db templates command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ db_templates_base }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ db_templates_base }} when: openshift_examples_load_db_templates | bool register: oex_import_db_templates failed_when: "'already exists' not in oex_import_db_templates.stderr and oex_import_db_templates.rc != 0" @@ -31,7 +31,7 @@ - name: Import quickstart-templates command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ quickstarts_base }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ quickstarts_base }} when: openshift_examples_load_quickstarts register: oex_import_quickstarts failed_when: "'already exists' not in oex_import_quickstarts.stderr and oex_import_quickstarts.rc != 0" @@ -40,7 +40,7 @@ - name: Import xPaas image streams command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ xpaas_image_streams }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ xpaas_image_streams }} when: openshift_examples_load_xpaas | bool register: oex_import_xpaas_streams failed_when: "'already exists' not in oex_import_xpaas_streams.stderr and oex_import_xpaas_streams.rc != 0" @@ -48,7 +48,7 @@ - name: Import xPaas templates command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ xpaas_templates_base }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ xpaas_templates_base }} when: openshift_examples_load_xpaas | bool register: oex_import_xpaas_templates failed_when: "'already exists' not in oex_import_xpaas_templates.stderr and oex_import_xpaas_templates.rc != 0" diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 991b8da66..69bb49c9b 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -296,9 +296,8 @@ def set_fluentd_facts_if_unset(facts): """ if 'common' in facts: - deployment_type = facts['common']['deployment_type'] if 'use_fluentd' not in facts['common']: - use_fluentd = True if deployment_type == 'online' else False + use_fluentd = False facts['common']['use_fluentd'] = use_fluentd return facts @@ -475,16 +474,17 @@ def set_deployment_facts_if_unset(facts): if deployment_type in ['enterprise', 'online']: data_dir = '/var/lib/openshift' facts['common']['data_dir'] = data_dir + facts['common']['version'] = get_openshift_version() for role in ('master', 'node'): if role in facts: deployment_type = facts['common']['deployment_type'] if 'registry_url' not in facts[role]: - registry_url = 'aos3/aos-${component}:${version}' - if deployment_type in ['enterprise', 'online']: + registry_url = 'openshift/origin-${component}:${version}' + if deployment_type in ['enterprise', 'online', 'openshift-enterprise']: registry_url = 'openshift3/ose-${component}:${version}' - elif deployment_type == 'origin': - registry_url = 'openshift/origin-${component}:${version}' + elif deployment_type == 'atomic-enterprise': + registry_url = 'aep3/aep-${component}:${version}' facts[role]['registry_url'] = registry_url return facts @@ -599,6 +599,21 @@ def get_current_config(facts): return current_config +def get_openshift_version(): + """ Get current version of openshift on the host + + Returns: + version: the current openshift version + """ + version = '' + + if os.path.isfile('/usr/bin/openshift'): + _, output, _ = module.run_command(['/usr/bin/openshift', 'version']) + versions = dict(e.split(' v') for e in output.splitlines()) + version = versions.get('openshift', '') + + #TODO: acknowledge the possility of a containerized install + return version def apply_provider_facts(facts, provider_facts): """ Apply provider facts to supplied facts dict @@ -644,7 +659,7 @@ def merge_facts(orig, new): facts = dict() for key, value in orig.iteritems(): if key in new: - if isinstance(value, dict): + if isinstance(value, dict) and isinstance(new[key], dict): facts[key] = merge_facts(value, new[key]) else: facts[key] = copy.copy(new[key]) diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index b57711b58..fa12005ab 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -100,6 +100,7 @@ template: dest: "{{ openshift_master_scheduler_conf }}" src: scheduler.json.j2 + backup: true notify: - restart master @@ -129,6 +130,7 @@ template: dest: "{{ openshift_master_config_file }}" src: master.yaml.v1.j2 + backup: true notify: - restart master diff --git a/roles/openshift_master/templates/scheduler.json.j2 b/roles/openshift_master/templates/scheduler.json.j2 index 835f2383e..cb5f43bb2 100644 --- a/roles/openshift_master/templates/scheduler.json.j2 +++ b/roles/openshift_master/templates/scheduler.json.j2 @@ -1,4 +1,6 @@ { + "kind": "Policy", + "apiVersion": "v1", "predicates": [ {"name": "MatchNodeSelector"}, {"name": "PodFitsResources"}, diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 1986b631e..e8cc499c0 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -47,6 +47,7 @@ template: dest: "{{ openshift_node_config_file }}" src: node.yaml.v1.j2 + backup: true notify: - restart node diff --git a/roles/openshift_serviceaccounts/tasks/main.yml b/roles/openshift_serviceaccounts/tasks/main.yml index 9665d0a72..d93a25a21 100644 --- a/roles/openshift_serviceaccounts/tasks/main.yml +++ b/roles/openshift_serviceaccounts/tasks/main.yml @@ -23,4 +23,4 @@ with_items: accounts - name: Apply new scc rules for service accounts - command: "{{ openshift.common.client_binary }} replace -f /tmp/scc.yaml" + command: "{{ openshift.common.client_binary }} update -f /tmp/scc.yaml" diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index 8347e9a61..e083296c0 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -12,6 +12,8 @@ - include_vars: template_docker.yml - include_vars: template_openshift_master.yml - include_vars: template_openshift_node.yml +- include_vars: template_ops_tools.yml +- include_vars: template_app_zabbix_server.yml - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -52,3 +54,19 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + +- name: Include Template Ops Tools + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_ops_tools }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Server + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_server }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml new file mode 100644 index 000000000..dace2aa29 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -0,0 +1,408 @@ +--- +g_template_app_zabbix_server: + name: Template App Zabbix Server + zitems: + - key: housekeeper_creates + applications: + - Zabbix server + description: A simple count of the number of partition creates output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_drops + applications: + - Zabbix server + description: A simple count of the number of partition drops output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_errors + applications: + - Zabbix server + description: A simple count of the number of errors output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_total + applications: + - Zabbix server + description: A simple count of the total number of lines output by the housekeeper + script. + units: '' + value_type: int + zabbix_type: '2' + + - key: zabbix[process,alerter,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,configuration syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,db watchdog,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,discoverer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,escalator,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,history syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,housekeeper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,http poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,icmp pinger,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,ipmi poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,java poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,node watcher,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,proxy poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,self-monitoring,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,snmp trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,timer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,unreachable poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[queue,10m] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[queue] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[rcache,buffer,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,history,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,text,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,trend,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,values] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + ztriggers: + - description: "There has been unexpected output while running the housekeeping script\ + \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ + \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\ + \ for more details." + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}' + name: Unexpected output in Zabbix DB Housekeeping + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc + + - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details. + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0' + name: Errors during Zabbix DB Housekeeping + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75' + name: Zabbix alerter processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75' + name: Zabbix configuration syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75' + name: Zabbix db watchdog processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75' + name: Zabbix discoverer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75' + name: Zabbix escalator processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75' + name: Zabbix history syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75' + name: Zabbix housekeeper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75' + name: Zabbix http poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75' + name: Zabbix icmp pinger processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75' + name: Zabbix ipmi poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75' + name: Zabbix java poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75' + name: Zabbix node watcher processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75' + name: Zabbix poller processes more than 75% busy + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75' + name: Zabbix proxy poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75' + name: Zabbix self-monitoring processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75' + name: Zabbix snmp trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: Timer processes usually are busy because they have to process time + based trigger functions + expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75' + name: Zabbix timer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75' + name: Zabbix trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75' + name: Zabbix unreachable poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: "This alert generally indicates a performance problem or a problem\ + \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\ + \ is Administration > Queue. Be sure to check the general view and the per-proxy\ + \ view." + expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000' + name: More than 1000 items having missing data for more than 10 minutes + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc + + - description: Consider increasing CacheSize in the zabbix_server.conf configuration + file + expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5' + name: Less than 5% free in the configuration cache + priority: info + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25' + name: Less than 25% free in the history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25' + name: Less than 25% free in the text history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25' + name: Less than 25% free in the trends cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc diff --git a/roles/os_zabbix/vars/template_ops_tools.yml b/roles/os_zabbix/vars/template_ops_tools.yml new file mode 100644 index 000000000..d1b8a2514 --- /dev/null +++ b/roles/os_zabbix/vars/template_ops_tools.yml @@ -0,0 +1,23 @@ +--- +g_template_ops_tools: + name: Template Operations Tools + zdiscoveryrules: + - name: disc.ops.runner + key: disc.ops.runner + lifetime: 1 + description: "Dynamically register operations runner items" + + zitemprototypes: + - discoveryrule_key: disc.ops.runner + name: "Exit code of ops-runner[{#OSO_COMMAND}]" + key: "disc.ops.runner.command.exitcode[{#OSO_COMMAND}]" + value_type: int + description: "The exit code of the command run from ops-runner" + applications: + - Ops Runner + + ztriggerprototypes: + - name: 'ops-runner[{#OSO_COMMAND}]: non-zero exit code on {HOST.NAME}' + expression: '{Template Operations Tools:disc.ops.runner.command.exitcode[{#OSO_COMMAND}].last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_ops_runner_command.asciidoc' + priority: average diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 84a7740b0..70c3809bd 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -10,17 +10,20 @@ g_template_os_linux: - key: kernel.all.cpu.wait.total applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.cpu.irq.hard applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.cpu.idle applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.uname.distro applications: @@ -35,7 +38,8 @@ g_template_os_linux: - key: kernel.all.cpu.irq.soft applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.load.15_minute applications: @@ -45,7 +49,8 @@ g_template_os_linux: - key: kernel.all.cpu.sys applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.load.5_minute applications: @@ -55,7 +60,8 @@ g_template_os_linux: - key: kernel.all.cpu.nice applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.load.1_minute applications: @@ -75,7 +81,8 @@ g_template_os_linux: - key: kernel.all.cpu.user applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.uname.machine applications: @@ -90,7 +97,8 @@ g_template_os_linux: - key: kernel.all.cpu.steal applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.pswitch applications: @@ -196,12 +204,10 @@ g_template_os_linux: - name: disc.filesys key: disc.filesys lifetime: 1 - template_name: Template OS Linux description: "Dynamically register the filesystems" zitemprototypes: - discoveryrule_key: disc.filesys - template_name: Template OS Linux name: "disc.filesys.full.{#OSO_FILESYS}" key: "disc.filesys.full[{#OSO_FILESYS}]" value_type: float @@ -209,6 +215,17 @@ g_template_os_linux: applications: - Disk + ztriggerprototypes: + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + + - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: high + ztriggers: - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' @@ -240,3 +257,18 @@ g_template_os_linux: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' + + # CPU Utilization # + - name: 'CPU idle less than 5% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: high + description: 'CPU is less than 5% idle' + + - name: 'CPU idle less than 10% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: warn + description: 'CPU is less than 10% idle' + dependencies: + - 'CPU idle less than 5% on {HOST.NAME}' |