diff options
author | Diego Castro <spinolacastro@gmail.com> | 2015-10-22 16:22:43 -0300 |
---|---|---|
committer | Diego Castro <spinolacastro@gmail.com> | 2015-10-22 16:22:43 -0300 |
commit | f559eb3146c65a6ec58f0fed3eb2d1124c1314c7 (patch) | |
tree | 24975a7a87568784b88252bc70d14f7b302670b0 /roles/os_zabbix | |
parent | 8468d25fae71c80277c10ad975641cb1ba230fd8 (diff) | |
parent | e6d426fddd79c08452195cd32286bb600f62d51d (diff) | |
download | openshift-f559eb3146c65a6ec58f0fed3eb2d1124c1314c7.tar.gz openshift-f559eb3146c65a6ec58f0fed3eb2d1124c1314c7.tar.bz2 openshift-f559eb3146c65a6ec58f0fed3eb2d1124c1314c7.tar.xz openshift-f559eb3146c65a6ec58f0fed3eb2d1124c1314c7.zip |
fix merge conflicts
Diffstat (limited to 'roles/os_zabbix')
24 files changed, 1055 insertions, 1286 deletions
diff --git a/roles/os_zabbix/README.md b/roles/os_zabbix/README.md new file mode 100644 index 000000000..ac3dc2833 --- /dev/null +++ b/roles/os_zabbix/README.md @@ -0,0 +1,40 @@ +os_zabbix +========= + +Automate zabbix tasks. + +Requirements +------------ + +This requires the openshift_tools rpm be installed for the zbxapi.py library. It can be found here: https://github.com/openshift/openshift-tools under openshift_tools/monitoring/zbxapi.py for now. + +Role Variables +-------------- + +zab_server +zab_username +zab_password + +Dependencies +------------ + +This depeonds on the zbxapi.py library located here: https://github.com/openshift/openshift-tools under openshift_tools/monitoring/zbxapi.py for now. + +Example Playbook +---------------- + + - zbx_host: + server: zab_server + user: zab_user + password: zab_password + name: 'myhost' + +License +------- + +ASL 2.0 + +Author Information +------------------ + +OpenShift operations, Red Hat, Inc diff --git a/roles/os_zabbix/defaults/main.yml b/roles/os_zabbix/defaults/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/os_zabbix/defaults/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/os_zabbix/handlers/main.yml b/roles/os_zabbix/handlers/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/os_zabbix/handlers/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/os_zabbix/library/__init__.py b/roles/os_zabbix/library/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/roles/os_zabbix/library/__init__.py +++ /dev/null diff --git a/roles/os_zabbix/library/test.yml b/roles/os_zabbix/library/test.yml deleted file mode 100644 index f585bcbb2..000000000 --- a/roles/os_zabbix/library/test.yml +++ /dev/null @@ -1,92 +0,0 @@ ---- -# This is a test playbook to create one of each of the zabbix ansible modules. -# ensure that the zbxapi module is installed -# ansible-playbook test.yml -- name: Test zabbix ansible module - hosts: localhost - gather_facts: no - vars: - zbx_server: http://localhost/zabbix/api_jsonrpc.php - zbx_user: Admin - zbx_password: zabbix - - pre_tasks: - - name: Create a template - zbx_template: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - name: 'test template' - register: template_output - - - debug: var=template_output - - - name: Create an item - zbx_item: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - name: 'test item' - key: 'kenny.item.1' - template_name: "{{ template_output.results[0].host }}" - register: item_output - - - debug: var=item_output - - - name: Create an trigger - zbx_trigger: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - expression: '{test template:kenny.item.1.last()}>2' - desc: 'Kenny desc' - register: trigger_output - - - debug: var=trigger_output - - - name: Create a hostgroup - zbx_hostgroup: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - name: 'kenny hostgroup' - register: hostgroup_output - - - debug: var=hostgroup_output - - - name: Create a host - zbx_host: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - name: 'kenny host' - hostgroups: - - 'kenny hostgroup' - register: host_output - - - debug: var=host_output - - - name: Create a usergroup - zbx_usergroup: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - name: kenny usergroup - rights: - - 'kenny hostgroup': rw - register: usergroup_output - - - debug: var=usergroup_output - - - name: Create a user - zbx_user: - server: "{{ zbx_server }}" - user: "{{ zbx_user }}" - password: "{{ zbx_password }}" - alias: kenny user - passwd: zabbix - usergroups: - - kenny usergroup - register: user_output - - - debug: var=user_output diff --git a/roles/os_zabbix/library/zbx_host.py b/roles/os_zabbix/library/zbx_host.py deleted file mode 100644 index d75dfdea1..000000000 --- a/roles/os_zabbix/library/zbx_host.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python -''' -Zabbix host ansible module -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - -def get_group_ids(zapi, hostgroup_names): - ''' - get hostgroups - ''' - # Fetch groups by name - group_ids = [] - for hgr in hostgroup_names: - content = zapi.get_content('hostgroup', 'get', {'search': {'name': hgr}}) - if content.has_key('result'): - group_ids.append({'groupid': content['result'][0]['groupid']}) - - return group_ids - -def get_template_ids(zapi, template_names): - ''' - get related templates - ''' - template_ids = [] - # Fetch templates by name - for template_name in template_names: - content = zapi.get_content('template', 'get', {'search': {'host': template_name}}) - if content.has_key('result'): - template_ids.append({'templateid': content['results'][0]['templateid']}) - return template_ids - -def main(): - ''' - Ansible module for zabbix host - ''' - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - name=dict(default=None, type='str'), - hostgroup_names=dict(default=[], type='list'), - template_names=dict(default=[], type='list'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - interfaces=dict(default=[], type='list'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zapi = ZabbixAPI(ZabbixConnection(module.params['server'], user, passwd, module.params['debug'])) - - #Set the instance and the template for the rest of the calls - zbx_class_name = 'host' - idname = "hostid" - hname = module.params['name'] - state = module.params['state'] - - # selectInterfaces doesn't appear to be working but is needed. - content = zapi.get_content(zbx_class_name, - 'get', - {'search': {'host': hname}, - 'selectGroups': 'groupid', - 'selectParentTemplates': 'templateid', - 'selectInterfaces': 'interfaceid', - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'host': hname, - 'groups': get_group_ids(zapi, module.params('hostgroup_names')), - 'templates': get_template_ids(zapi, module.params('template_names')), - 'interfaces': module.params.get('interfaces', [{'type': 1, # interface type, 1 = agent - 'main': 1, # default interface? 1 = true - 'useip': 1, # default interface? 1 = true - 'ip': '127.0.0.1', # default interface? 1 = true - 'dns': '', # dns for host - 'port': '10050', # port for interface? 10050 - }]) - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - - if key == 'templates' and zab_results.has_key('parentTemplates'): - if zab_results['parentTemplates'] != value: - differences[key] = value - - elif zab_results[key] != value and zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_hostgroup.py b/roles/os_zabbix/library/zbx_hostgroup.py deleted file mode 100644 index a1eb875d4..000000000 --- a/roles/os_zabbix/library/zbx_hostgroup.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -''' Ansible module for hostgroup -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix hostgroup ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - -def main(): - ''' ansible module for hostgroup - ''' - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - name=dict(default=None, type='str'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zapi = ZabbixAPI(ZabbixConnection(module.params['server'], user, passwd, module.params['debug'])) - - #Set the instance and the template for the rest of the calls - zbx_class_name = 'hostgroup' - idname = "groupid" - hname = module.params['name'] - state = module.params['state'] - - content = zapi.get_content(zbx_class_name, - 'get', - {'search': {'name': hname}, - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'name': hname} - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - if zab_results[key] != value and zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_item.py b/roles/os_zabbix/library/zbx_item.py deleted file mode 100644 index 57ec06463..000000000 --- a/roles/os_zabbix/library/zbx_item.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -''' - Ansible module for zabbix items -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix item ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - -def get_value_type(value_type): - ''' - Possible values: - 0 - numeric float; - 1 - character; - 2 - log; - 3 - numeric unsigned; - 4 - text - ''' - vtype = 0 - if 'int' in value_type: - vtype = 3 - elif 'char' in value_type: - vtype = 1 - elif 'str' in value_type: - vtype = 4 - - return vtype - -def main(): - ''' - ansible zabbix module for zbx_item - ''' - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - name=dict(default=None, type='str'), - key=dict(default=None, type='str'), - template_name=dict(default=None, type='str'), - zabbix_type=dict(default=2, type='int'), - value_type=dict(default='int', type='str'), - applications=dict(default=[], type='list'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zapi = ZabbixAPI(ZabbixConnection(module.params['server'], user, passwd, module.params['debug'])) - - #Set the instance and the template for the rest of the calls - zbx_class_name = 'item' - idname = "itemid" - state = module.params['state'] - key = module.params['key'] - template_name = module.params['template_name'] - - content = zapi.get_content('template', 'get', {'search': {'host': template_name}}) - templateid = None - if content['result']: - templateid = content['result'][0]['templateid'] - else: - module.exit_json(changed=False, - results='Error: Could find template with name %s for item.' % template_name, - state="Unkown") - - content = zapi.get_content(zbx_class_name, - 'get', - {'search': {'key_': key}, - 'selectApplications': 'applicationid', - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'name': module.params.get('name', module.params['key']), - 'key_': key, - 'hostid': templateid, - 'type': module.params['zabbix_type'], - 'value_type': get_value_type(module.params['value_type']), - 'applications': module.params['applications'], - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - - if zab_results[key] != value and zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_mediatype.py b/roles/os_zabbix/library/zbx_mediatype.py deleted file mode 100644 index a49aecd0f..000000000 --- a/roles/os_zabbix/library/zbx_mediatype.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python -''' - Ansible module for mediatype -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix mediatype ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True -def get_mtype(mtype): - ''' - Transport used by the media type. - Possible values: - 0 - email; - 1 - script; - 2 - SMS; - 3 - Jabber; - 100 - Ez Texting. - ''' - mtype = mtype.lower() - media_type = None - if mtype == 'script': - media_type = 1 - elif mtype == 'sms': - media_type = 2 - elif mtype == 'jabber': - media_type = 3 - elif mtype == 'script': - media_type = 100 - else: - media_type = 0 - - return media_type - -def main(): - ''' - Ansible zabbix module for mediatype - ''' - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - description=dict(default=None, type='str'), - mtype=dict(default=None, type='str'), - smtp_server=dict(default=None, type='str'), - smtp_helo=dict(default=None, type='str'), - smtp_email=dict(default=None, type='str'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zapi = ZabbixAPI(ZabbixConnection(module.params['server'], user, passwd, module.params['debug'])) - - #Set the instance and the template for the rest of the calls - zbx_class_name = 'mediatype' - idname = "mediatypeid" - description = module.params['description'] - state = module.params['state'] - - content = zapi.get_content(zbx_class_name, 'get', {'search': {'description': description}}) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'description': description, - 'type': get_mtype(module.params['media_type']), - 'smtp_server': module.params['smtp_server'], - 'smtp_helo': module.params['smtp_helo'], - 'smtp_email': module.params['smtp_email'], - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - if zab_results[key] != value and \ - zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_template.py b/roles/os_zabbix/library/zbx_template.py deleted file mode 100644 index 676fa7e49..000000000 --- a/roles/os_zabbix/library/zbx_template.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python -''' -Ansible module for template -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix template ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - -def main(): - ''' Ansible module for template - ''' - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - name=dict(default=None, type='str'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zbc = ZabbixConnection(module.params['server'], user, passwd, module.params['debug']) - zapi = ZabbixAPI(zbc) - - #Set the instance and the template for the rest of the calls - zbx_class_name = 'template' - idname = 'templateid' - tname = module.params['name'] - state = module.params['state'] - # get a template, see if it exists - content = zapi.get_content(zbx_class_name, - 'get', - {'search': {'host': tname}, - 'selectParentTemplates': 'templateid', - 'selectGroups': 'groupid', - #'selectApplications': extend, - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'groups': module.params.get('groups', [{'groupid': '1'}]), - 'host': tname, - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - if key == 'templates' and zab_results.has_key('parentTemplates'): - if zab_results['parentTemplates'] != value: - differences[key] = value - elif zab_results[key] != str(value) and zab_results[key] != value: - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=content['result'], state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_trigger.py b/roles/os_zabbix/library/zbx_trigger.py deleted file mode 100644 index 7cc9356c8..000000000 --- a/roles/os_zabbix/library/zbx_trigger.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python -''' -ansible module for zabbix triggers -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix trigger ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - - -def get_priority(priority): - ''' determine priority - ''' - prior = 0 - if 'info' in priority: - prior = 1 - elif 'warn' in priority: - prior = 2 - elif 'avg' == priority or 'ave' in priority: - prior = 3 - elif 'high' in priority: - prior = 4 - elif 'dis' in priority: - prior = 5 - - return prior - -def get_deps(zapi, deps): - ''' get trigger dependencies - ''' - results = [] - for desc in deps: - content = zapi.get_content('trigger', - 'get', - {'search': {'description': desc}, - 'expandExpression': True, - 'selectDependencies': 'triggerid', - }) - if content.has_key('result'): - results.append({'triggerid': content['result'][0]['triggerid']}) - - return results - -def main(): - ''' - Create a trigger in zabbix - - Example: - "params": { - "description": "Processor load is too high on {HOST.NAME}", - "expression": "{Linux server:system.cpu.load[percpu,avg1].last()}>5", - "dependencies": [ - { - "triggerid": "14062" - } - ] - }, - - ''' - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - expression=dict(default=None, type='str'), - description=dict(default=None, type='str'), - dependencies=dict(default=[], type='list'), - priority=dict(default='avg', type='str'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - - zapi = ZabbixAPI(ZabbixConnection(module.params['server'], user, passwd, module.params['debug'])) - - #Set the instance and the template for the rest of the calls - zbx_class_name = 'trigger' - idname = "triggerid" - state = module.params['state'] - description = module.params['description'] - - content = zapi.get_content(zbx_class_name, - 'get', - {'search': {'description': description}, - 'expandExpression': True, - 'selectDependencies': 'triggerid', - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'description': description, - 'expression': module.params['expression'], - 'dependencies': get_deps(zapi, module.params['dependencies']), - 'priority': get_priority(module.params['priority']), - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - - if zab_results[key] != value and zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_user.py b/roles/os_zabbix/library/zbx_user.py deleted file mode 100644 index 489023407..000000000 --- a/roles/os_zabbix/library/zbx_user.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python -''' -ansible module for zabbix users -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix user ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - -def get_usergroups(zapi, usergroups): - ''' Get usergroups - ''' - ugroups = [] - for ugr in usergroups: - content = zapi.get_content('usergroup', - 'get', - {'search': {'name': ugr}, - #'selectUsers': 'userid', - #'getRights': 'extend' - }) - if content['result']: - ugroups.append({'usrgrpid': content['result'][0]['usrgrpid']}) - - return ugroups - -def main(): - ''' - ansible zabbix module for users - ''' - - ##def user(self, name, state='present', params=None): - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - alias=dict(default=None, type='str'), - passwd=dict(default=None, type='str'), - usergroups=dict(default=None, type='list'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - password = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zbc = ZabbixConnection(module.params['server'], user, password, module.params['debug']) - zapi = ZabbixAPI(zbc) - - ## before we can create a user media and users with media types we need media - zbx_class_name = 'user' - idname = "userid" - alias = module.params['alias'] - state = module.params['state'] - - content = zapi.get_content(zbx_class_name, - 'get', - {'output': 'extend', - 'search': {'alias': alias}, - "selectUsrgrps": 'usergrpid', - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'alias': alias, - 'passwd': module.params['passwd'], - 'usrgrps': get_usergroups(zapi, module.params['usergroups']), - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - if key == 'passwd': - differences[key] = value - - elif zab_results[key] != value and zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/library/zbx_usergroup.py b/roles/os_zabbix/library/zbx_usergroup.py deleted file mode 100644 index ede4c9df1..000000000 --- a/roles/os_zabbix/library/zbx_usergroup.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -''' -zabbix ansible module for usergroups -''' -# vim: expandtab:tabstop=4:shiftwidth=4 -# -# Zabbix usergroup ansible module -# -# -# Copyright 2015 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This is in place because each module looks similar to each other. -# These need duplicate code as their behavior is very similar -# but different for each zabbix class. -# pylint: disable=duplicate-code - -# pylint: disable=import-error -from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection - -def exists(content, key='result'): - ''' Check if key exists in content or the size of content[key] > 0 - ''' - if not content.has_key(key): - return False - - if not content[key]: - return False - - return True - -def get_rights(zapi, rights): - '''Get rights - ''' - perms = [] - for right in rights: - hstgrp = right.keys()[0] - perm = right.values()[0] - content = zapi.get_content('hostgroup', 'get', {'search': {'name': hstgrp}}) - if content['result']: - permission = 0 - if perm == 'ro': - permission = 2 - elif perm == 'rw': - permission = 3 - perms.append({'id': content['result'][0]['groupid'], - 'permission': permission}) - return perms - -def get_userids(zapi, users): - ''' Get userids from user aliases - ''' - userids = [] - for alias in users: - content = zapi.get_content('user', 'get', {'search': {'alias': alias}}) - if content['result']: - userids.append(content['result'][0]['userid']) - - return userids - -def main(): - ''' Ansible module for usergroup - ''' - - ##def usergroup(self, name, rights=None, users=None, state='present', params=None): - - module = AnsibleModule( - argument_spec=dict( - server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), - user=dict(default=None, type='str'), - password=dict(default=None, type='str'), - name=dict(default=None, type='str'), - rights=dict(default=[], type='list'), - users=dict(default=[], type='list'), - debug=dict(default=False, type='bool'), - state=dict(default='present', type='str'), - ), - #supports_check_mode=True - ) - - user = module.params.get('user', os.environ['ZABBIX_USER']) - passwd = module.params.get('password', os.environ['ZABBIX_PASSWORD']) - - zapi = ZabbixAPI(ZabbixConnection(module.params['server'], user, passwd, module.params['debug'])) - - zbx_class_name = 'usergroup' - idname = "usrgrpid" - uname = module.params['name'] - state = module.params['state'] - - content = zapi.get_content(zbx_class_name, - 'get', - {'search': {'name': uname}, - 'selectUsers': 'userid', - }) - if state == 'list': - module.exit_json(changed=False, results=content['result'], state="list") - - if state == 'absent': - if not exists(content): - module.exit_json(changed=False, state="absent") - - content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) - module.exit_json(changed=True, results=content['result'], state="absent") - - if state == 'present': - params = {'name': uname, - 'rights': get_rights(zapi, module.params['rights']), - 'userids': get_userids(zapi, module.params['users']), - } - - if not exists(content): - # if we didn't find it, create it - content = zapi.get_content(zbx_class_name, 'create', params) - module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties - differences = {} - zab_results = content['result'][0] - for key, value in params.items(): - if key == 'rights': - differences['rights'] = value - - elif key == 'userids' and zab_results.has_key('users'): - if zab_results['users'] != value: - differences['userids'] = value - - elif zab_results[key] != value and zab_results[key] != str(value): - differences[key] = value - - if not differences: - module.exit_json(changed=False, results=zab_results, state="present") - - # We have differences and need to update - differences[idname] = zab_results[idname] - content = zapi.get_content(zbx_class_name, 'update', differences) - module.exit_json(changed=True, results=content['result'], state="present") - - module.exit_json(failed=True, - changed=False, - results='Unknown state passed. %s' % state, - state="unknown") - -# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled -# import module snippets. This are required -from ansible.module_utils.basic import * - -main() diff --git a/roles/os_zabbix/meta/main.yml b/roles/os_zabbix/meta/main.yml new file mode 100644 index 000000000..360f5aad2 --- /dev/null +++ b/roles/os_zabbix/meta/main.yml @@ -0,0 +1,9 @@ +--- +galaxy_info: + author: OpenShift + description: ZabbixAPI + company: Red Hat, Inc + license: ASL 2.0 + min_ansible_version: 1.2 +dependencies: +- lib_zabbix diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml new file mode 100644 index 000000000..a503b24d7 --- /dev/null +++ b/roles/os_zabbix/tasks/main.yml @@ -0,0 +1,81 @@ +--- +- name: Main List all templates + zbx_template: + zbx_server: "{{ ozb_server }}" + zbx_user: "{{ ozb_user }}" + zbx_password: "{{ ozb_password }}" + state: list + register: templates + +- include_vars: template_heartbeat.yml +- include_vars: template_os_linux.yml +- include_vars: template_docker.yml +- include_vars: template_openshift_master.yml +- include_vars: template_openshift_node.yml +- include_vars: template_ops_tools.yml +- include_vars: template_app_zabbix_server.yml +- include_vars: template_app_zabbix_agent.yml + +- name: Include Template Heartbeat + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_heartbeat }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template os_linux + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_os_linux }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template docker + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_docker }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template Openshift Master + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_openshift_master }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template Openshift Node + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_openshift_node }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template Ops Tools + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_ops_tools }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Server + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_server }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Agent + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_agent }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/main.yml b/roles/os_zabbix/vars/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/os_zabbix/vars/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml new file mode 100644 index 000000000..6349b6384 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -0,0 +1,23 @@ +--- +g_template_app_zabbix_agent: + name: Template App Zabbix Agent + zitems: + - key: agent.hostname + applications: + - Zabbix agent + value_type: character + zabbix_type: 0 + + - key: agent.ping + applications: + - Zabbix agent + description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. + value_type: int + zabbix_type: 0 + + ztriggers: + - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' + description: Zabbix agent is unreachable for 15 minutes. + expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1' + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml new file mode 100644 index 000000000..aeec16254 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -0,0 +1,412 @@ +--- +g_template_app_zabbix_server: + name: Template App Zabbix Server + zitems: + - key: housekeeper_creates + applications: + - Zabbix server + description: A simple count of the number of partition creates output by the housekeeper script. + units: '' + value_type: int + zabbix_type: 5 + + - key: housekeeper_drops + applications: + - Zabbix server + description: A simple count of the number of partition drops output by the housekeeper script. + units: '' + value_type: int + zabbix_type: 5 + + - key: housekeeper_errors + applications: + - Zabbix server + description: A simple count of the number of errors output by the housekeeper script. + units: '' + value_type: int + zabbix_type: 5 + + - key: housekeeper_total + applications: + - Zabbix server + description: A simple count of the total number of lines output by the housekeeper + script. + units: '' + value_type: int + zabbix_type: 5 + + - key: zabbix[process,alerter,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,configuration syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,db watchdog,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,discoverer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,escalator,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,history syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,housekeeper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,http poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,icmp pinger,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,ipmi poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,java poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,node watcher,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,proxy poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,self-monitoring,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,snmp trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,timer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[process,unreachable poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: 5 + + - key: zabbix[queue,10m] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: 5 + interval: 600 + + - key: zabbix[queue] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: 5 + interval: 600 + + - key: zabbix[rcache,buffer,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: 5 + + - key: zabbix[wcache,history,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: 5 + + - key: zabbix[wcache,text,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: 5 + + - key: zabbix[wcache,trend,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: 5 + + - key: zabbix[wcache,values] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: 5 + delta: 1 # speed per second + + ztriggers: + - description: "There has been unexpected output while running the housekeeping script\ + \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ + \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\ + \ for more details." + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}' + name: Unexpected output in Zabbix DB Housekeeping + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc + + - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details. + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0' + name: Errors during Zabbix DB Housekeeping + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75' + name: Zabbix alerter processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75' + name: Zabbix configuration syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75' + name: Zabbix db watchdog processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75' + name: Zabbix discoverer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75' + name: Zabbix escalator processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75' + name: Zabbix history syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75' + name: Zabbix housekeeper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75' + name: Zabbix http poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75' + name: Zabbix icmp pinger processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75' + name: Zabbix ipmi poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75' + name: Zabbix java poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75' + name: Zabbix node watcher processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75' + name: Zabbix poller processes more than 75% busy + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75' + name: Zabbix proxy poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75' + name: Zabbix self-monitoring processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75' + name: Zabbix snmp trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: Timer processes usually are busy because they have to process time + based trigger functions + expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75' + name: Zabbix timer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75' + name: Zabbix trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75' + name: Zabbix unreachable poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: "This alert generally indicates a performance problem or a problem\ + \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\ + \ is Administration > Queue. Be sure to check the general view and the per-proxy\ + \ view." + expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000' + name: More than 1000 items having missing data for more than 10 minutes + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc + + - description: Consider increasing CacheSize in the zabbix_server.conf configuration + file + expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5' + name: Less than 5% free in the configuration cache + priority: info + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25' + name: Less than 25% free in the history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25' + name: Less than 25% free in the text history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25' + name: Less than 25% free in the trends cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml new file mode 100644 index 000000000..395e054de --- /dev/null +++ b/roles/os_zabbix/vars/template_docker.yml @@ -0,0 +1,89 @@ +--- +g_template_docker: + name: Template Docker + zitems: + - key: docker.ping + applications: + - Docker Daemon + value_type: int + + - key: docker.storage.is_loopback + applications: + - Docker Storage + value_type: int + + - key: docker.storage.data.space.total + applications: + - Docker Storage + value_type: float + + - key: docker.storage.data.space.used + applications: + - Docker Storage + value_type: float + + - key: docker.storage.data.space.available + applications: + - Docker Storage + value_type: float + + - key: docker.storage.data.space.percent_available + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.total + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.used + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.available + applications: + - Docker Storage + value_type: float + + - key: docker.storage.metadata.space.percent_available + applications: + - Docker Storage + value_type: float + ztriggers: + - name: 'docker.ping failed on {HOST.NAME}' + expression: '{Template Docker:docker.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' + priority: high + + - name: 'Docker storage is using LOOPBACK on {HOST.NAME}' + expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' + priority: high + + - name: 'Critically low docker storage data space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' + priority: high + + - name: 'Critically low docker storage metadata space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' + priority: high + + # Put triggers that depend on other triggers here (deps must be created first) + - name: 'Low docker storage data space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' + dependencies: + - 'Critically low docker storage data space on {HOST.NAME}' + priority: average + + - name: 'Low docker storage metadata space on {HOST.NAME}' + expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' + dependencies: + - 'Critically low docker storage metadata space on {HOST.NAME}' + priority: average + diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml new file mode 100644 index 000000000..8dbe0d0d6 --- /dev/null +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -0,0 +1,13 @@ +--- +g_template_heartbeat: + name: Template Heartbeat + zitems: + - name: Heartbeat Ping + applications: + - Heartbeat + key: heartbeat.ping + ztriggers: + - name: 'Heartbeat.ping has failed on {HOST.NAME}' + expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1' + priority: avg + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml new file mode 100644 index 000000000..1de4fefbb --- /dev/null +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -0,0 +1,58 @@ +--- +g_template_openshift_master: + name: Template Openshift Master + zitems: + - name: create_app + applications: + - Openshift Master + key: create_app + + - key: openshift.master.process.count + description: Shows number of master processes running + type: int + applications: + - Openshift Master + + - key: openshift.master.user.count + description: Shows number of users in a cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pod.running.count + description: Shows number of pods running + type: int + applications: + - Openshift Master + + - key: openshift.project.counter + description: Shows number of projects on a cluster + type: int + applications: + - Openshift Master + + ztriggers: + - name: 'Application creation has failed on {HOST.NAME}' + expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' + priority: avg + + - name: 'Openshift Master process not running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Too many Openshift Master processes running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Number of users for Openshift Master on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info + + - name: 'There are no projects running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.project.counter.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml new file mode 100644 index 000000000..ce28b1048 --- /dev/null +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -0,0 +1,44 @@ +--- +g_template_openshift_node: + name: Template Openshift Node + zitems: + - key: openshift.node.process.count + description: Shows number of OpenShift Node processes running + type: int + applications: + - Openshift Node + + - key: openshift.node.ovs.pids.count + description: Shows number of ovs process ids running + type: int + applications: + - Openshift Node + + - key: openshift.node.ovs.ports.count + description: Shows number of OVS ports defined + type: int + applications: + - Openshift Node + + ztriggers: + - name: 'Openshift Node process not running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'Too many Openshift Node processes running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'OVS may not be running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last()}<>4' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'Number of OVS ports is 0 on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + diff --git a/roles/os_zabbix/vars/template_ops_tools.yml b/roles/os_zabbix/vars/template_ops_tools.yml new file mode 100644 index 000000000..d1b8a2514 --- /dev/null +++ b/roles/os_zabbix/vars/template_ops_tools.yml @@ -0,0 +1,23 @@ +--- +g_template_ops_tools: + name: Template Operations Tools + zdiscoveryrules: + - name: disc.ops.runner + key: disc.ops.runner + lifetime: 1 + description: "Dynamically register operations runner items" + + zitemprototypes: + - discoveryrule_key: disc.ops.runner + name: "Exit code of ops-runner[{#OSO_COMMAND}]" + key: "disc.ops.runner.command.exitcode[{#OSO_COMMAND}]" + value_type: int + description: "The exit code of the command run from ops-runner" + applications: + - Ops Runner + + ztriggerprototypes: + - name: 'ops-runner[{#OSO_COMMAND}]: non-zero exit code on {HOST.NAME}' + expression: '{Template Operations Tools:disc.ops.runner.command.exitcode[{#OSO_COMMAND}].last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_ops_runner_command.asciidoc' + priority: average diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml new file mode 100644 index 000000000..3ae1500bc --- /dev/null +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -0,0 +1,260 @@ +--- +g_template_os_linux: + name: Template OS Linux + zitems: + - key: kernel.uname.sysname + applications: + - Kernel + value_type: string + + - key: kernel.all.cpu.wait.total + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.all.cpu.irq.hard + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.all.cpu.idle + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.uname.distro + applications: + - Kernel + value_type: string + + - key: kernel.uname.nodename + applications: + - Kernel + value_type: string + + - key: kernel.all.cpu.irq.soft + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.all.load.15_minute + applications: + - Kernel + value_type: float + + - key: kernel.all.cpu.sys + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.all.load.5_minute + applications: + - Kernel + value_type: float + + - key: kernel.all.cpu.nice + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.all.load.1_minute + applications: + - Kernel + value_type: float + + - key: kernel.uname.version + applications: + - Kernel + value_type: string + + - key: kernel.all.uptime + applications: + - Kernel + value_type: int + + - key: kernel.all.cpu.user + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.uname.machine + applications: + - Kernel + value_type: string + + - key: hinv.ncpu + applications: + - Kernel + value_type: int + + - key: kernel.all.cpu.steal + applications: + - Kernel + value_type: float + units: '%' + + - key: kernel.all.pswitch + applications: + - Kernel + value_type: int + + - key: kernel.uname.release + applications: + - Kernel + value_type: string + + - key: proc.nprocs + applications: + - Kernel + value_type: int + + # Memory Items + - key: mem.freemem + applications: + - Memory + value_type: int + description: "PCP: free system memory metric from /proc/meminfo" + multiplier: 1024 + units: B + + - key: mem.util.bufmem + applications: + - Memory + value_type: int + description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo" + multiplier: 1024 + units: B + + - key: swap.used + applications: + - Memory + value_type: int + description: "PCP: swap used metric from /proc/meminfo" + multiplier: 1024 + units: B + + - key: swap.length + applications: + - Memory + value_type: int + description: "PCP: total swap available metric from /proc/meminfo" + multiplier: 1024 + units: B + + - key: mem.physmem + applications: + - Memory + value_type: int + description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM." + multiplier: 1024 + units: B + + - key: swap.free + applications: + - Memory + value_type: int + description: "PCP: swap free metric from /proc/meminfo" + multiplier: 1024 + units: B + + - key: mem.util.available + applications: + - Memory + value_type: int + description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo" + multiplier: 1024 + units: B + + - key: mem.util.used + applications: + - Memory + value_type: int + description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo" + multiplier: 1024 + units: B + + - key: mem.util.cached + applications: + - Memory + value_type: int + description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo" + multiplier: 1024 + units: B + + zdiscoveryrules: + - name: disc.filesys + key: disc.filesys + lifetime: 1 + description: "Dynamically register the filesystems" + + zitemprototypes: + - discoveryrule_key: disc.filesys + name: "disc.filesys.full.{#OSO_FILESYS}" + key: "disc.filesys.full[{#OSO_FILESYS}]" + value_type: float + description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full" + applications: + - Disk + + - discoveryrule_key: disc.filesys + name: "Percentage of used inodes on {#OSO_FILESYS}" + key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]" + value_type: float + description: "PCP derived value of percentage of used inodes on a filesystem." + applications: + - Disk + + ztriggerprototypes: + - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: high + + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: warn + + - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' + priority: high + + ztriggers: + - name: 'Too many TOTAL processes on {HOST.NAME}' + expression: '{Template OS Linux:proc.nprocs.last()}>5000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' + priority: warn + + - name: 'Lack of available memory on {HOST.NAME}' + expression: '{Template OS Linux:mem.freemem.last()}<30720000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' + priority: warn + description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' + + # CPU Utilization # + - name: 'CPU idle less than 5% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: average + description: 'CPU is less than 5% idle' + + - name: 'CPU idle less than 10% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: warn + description: 'CPU is less than 10% idle' + dependencies: + - 'CPU idle less than 5% on {HOST.NAME}' |