diff options
Diffstat (limited to 'roles')
32 files changed, 1629 insertions, 85 deletions
diff --git a/roles/ansible_tower/tasks/main.yaml b/roles/ansible_tower/tasks/main.yaml index c110a3b70..b7757214d 100644 --- a/roles/ansible_tower/tasks/main.yaml +++ b/roles/ansible_tower/tasks/main.yaml @@ -9,6 +9,7 @@ - ansible - telnet - ack + - pylint - name: download Tower setup get_url: url=http://releases.ansible.com/ansible-tower/setup/ansible-tower-setup-2.1.1.tar.gz dest=/opt/ force=no @@ -38,5 +39,3 @@ regexp: "^({{ item.option }})( *)=" line: '\1\2= {{ item.value }}' with_items: config_changes | default([], true) - - diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index 27bfb7de9..656901409 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -38,6 +38,7 @@ template: src: etcd.conf.j2 dest: /etc/etcd/etcd.conf + backup: true notify: - restart etcd diff --git a/roles/etcd_ca/tasks/main.yml b/roles/etcd_ca/tasks/main.yml index 8a266f732..625756867 100644 --- a/roles/etcd_ca/tasks/main.yml +++ b/roles/etcd_ca/tasks/main.yml @@ -18,6 +18,7 @@ - template: dest: "{{ etcd_ca_dir }}/fragments/openssl_append.cnf" src: openssl_append.j2 + backup: true - assemble: src: "{{ etcd_ca_dir }}/fragments" diff --git a/roles/fluentd_master/tasks/main.yml b/roles/fluentd_master/tasks/main.yml index 69f8eceab..55cd94460 100644 --- a/roles/fluentd_master/tasks/main.yml +++ b/roles/fluentd_master/tasks/main.yml @@ -52,4 +52,3 @@ name: 'td-agent' state: started enabled: yes - diff --git a/roles/lib_zabbix/library/zbx_action.py b/roles/lib_zabbix/library/zbx_action.py new file mode 100644 index 000000000..d64cebae1 --- /dev/null +++ b/roles/lib_zabbix/library/zbx_action.py @@ -0,0 +1,538 @@ +#!/usr/bin/env python +''' + Ansible module for zabbix actions +''' +# vim: expandtab:tabstop=4:shiftwidth=4 +# +# Zabbix action ansible module +# +# +# Copyright 2015 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This is in place because each module looks similar to each other. +# These need duplicate code as their behavior is very similar +# but different for each zabbix class. +# pylint: disable=duplicate-code + +# pylint: disable=import-error +from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection, ZabbixAPIError + +def exists(content, key='result'): + ''' Check if key exists in content or the size of content[key] > 0 + ''' + if not content.has_key(key): + return False + + if not content[key]: + return False + + return True + +def conditions_equal(zab_conditions, user_conditions): + '''Compare two lists of conditions''' + c_type = 'conditiontype' + _op = 'operator' + val = 'value' + if len(user_conditions) != len(zab_conditions): + return False + + for zab_cond, user_cond in zip(zab_conditions, user_conditions): + if zab_cond[c_type] != str(user_cond[c_type]) or zab_cond[_op] != str(user_cond[_op]) or \ + zab_cond[val] != str(user_cond[val]): + return False + + return True + +def filter_differences(zabbix_filters, user_filters): + '''Determine the differences from user and zabbix for operations''' + rval = {} + for key, val in user_filters.items(): + + if key == 'conditions': + if not conditions_equal(zabbix_filters[key], val): + rval[key] = val + + elif zabbix_filters[key] != str(val): + rval[key] = val + + return rval + +# This logic is quite complex. We are comparing two lists of dictionaries. +# The outer for-loops allow us to descend down into both lists at the same time +# and then walk over the key,val pairs of the incoming user dict's changes +# or updates. The if-statements are looking at different sub-object types and +# comparing them. The other suggestion on how to write this is to write a recursive +# compare function but for the time constraints and for complexity I decided to go +# this route. +# pylint: disable=too-many-branches +def operation_differences(zabbix_ops, user_ops): + '''Determine the differences from user and zabbix for operations''' + + # if they don't match, take the user options + if len(zabbix_ops) != len(user_ops): + return user_ops + + rval = {} + for zab, user in zip(zabbix_ops, user_ops): + for key, val in user.items(): + if key == 'opconditions': + for z_cond, u_cond in zip(zab[key], user[key]): + if not all([str(u_cond[op_key]) == z_cond[op_key] for op_key in \ + ['conditiontype', 'operator', 'value']]): + rval[key] = val + break + elif key == 'opmessage': + # Verify each passed param matches + for op_msg_key, op_msg_val in val.items(): + if zab[key][op_msg_key] != str(op_msg_val): + rval[key] = val + break + + elif key == 'opmessage_grp': + zab_grp_ids = set([ugrp['usrgrpid'] for ugrp in zab[key]]) + usr_grp_ids = set([ugrp['usrgrpid'] for ugrp in val]) + if usr_grp_ids != zab_grp_ids: + rval[key] = val + + elif key == 'opmessage_usr': + zab_usr_ids = set([usr['userid'] for usr in zab[key]]) + usr_ids = set([usr['userid'] for usr in val]) + if usr_ids != zab_usr_ids: + rval[key] = val + + elif zab[key] != str(val): + rval[key] = val + return rval + +def get_users(zapi, users): + '''get the mediatype id from the mediatype name''' + rval_users = [] + + for user in users: + content = zapi.get_content('user', + 'get', + {'filter': {'alias': user}}) + rval_users.append({'userid': content['result'][0]['userid']}) + + return rval_users + +def get_user_groups(zapi, groups): + '''get the mediatype id from the mediatype name''' + user_groups = [] + + content = zapi.get_content('usergroup', + 'get', + {'search': {'name': groups}}) + + for usr_grp in content['result']: + user_groups.append({'usrgrpid': usr_grp['usrgrpid']}) + + return user_groups + +def get_mediatype_id_by_name(zapi, m_name): + '''get the mediatype id from the mediatype name''' + content = zapi.get_content('mediatype', + 'get', + {'filter': {'description': m_name}}) + + return content['result'][0]['mediatypeid'] + +def get_priority(priority): + ''' determine priority + ''' + prior = 0 + if 'info' in priority: + prior = 1 + elif 'warn' in priority: + prior = 2 + elif 'avg' == priority or 'ave' in priority: + prior = 3 + elif 'high' in priority: + prior = 4 + elif 'dis' in priority: + prior = 5 + + return prior + +def get_event_source(from_src): + '''Translate even str into value''' + choices = ['trigger', 'discovery', 'auto', 'internal'] + rval = 0 + try: + rval = choices.index(from_src) + except ValueError as _: + ZabbixAPIError('Value not found for event source [%s]' % from_src) + + return rval + +def get_status(inc_status): + '''determine status for action''' + rval = 1 + if inc_status == 'enabled': + rval = 0 + + return rval + +def get_condition_operator(inc_operator): + ''' determine the condition operator''' + vals = {'=': 0, + '<>': 1, + 'like': 2, + 'not like': 3, + 'in': 4, + '>=': 5, + '<=': 6, + 'not in': 7, + } + + return vals[inc_operator] + +def get_host_id_by_name(zapi, host_name): + '''Get host id by name''' + content = zapi.get_content('host', + 'get', + {'filter': {'name': host_name}}) + + return content['result'][0]['hostid'] + +def get_trigger_value(inc_trigger): + '''determine the proper trigger value''' + rval = 1 + if inc_trigger == 'PROBLEM': + rval = 1 + else: + rval = 0 + + return rval + +def get_template_id_by_name(zapi, t_name): + '''get the template id by name''' + content = zapi.get_content('template', + 'get', + {'filter': {'host': t_name}}) + + return content['result'][0]['templateid'] + + +def get_host_group_id_by_name(zapi, hg_name): + '''Get hostgroup id by name''' + content = zapi.get_content('hostgroup', + 'get', + {'filter': {'name': hg_name}}) + + return content['result'][0]['groupid'] + +def get_condition_type(event_source, inc_condition): + '''determine the condition type''' + c_types = {} + if event_source == 'trigger': + c_types = {'host group': 0, + 'host': 1, + 'trigger': 2, + 'trigger name': 3, + 'trigger severity': 4, + 'trigger value': 5, + 'time period': 6, + 'host template': 13, + 'application': 15, + 'maintenance status': 16, + } + + elif event_source == 'discovery': + c_types = {'host IP': 7, + 'discovered service type': 8, + 'discovered service port': 9, + 'discovery status': 10, + 'uptime or downtime duration': 11, + 'received value': 12, + 'discovery rule': 18, + 'discovery check': 19, + 'proxy': 20, + 'discovery object': 21, + } + + elif event_source == 'auto': + c_types = {'proxy': 20, + 'host name': 22, + 'host metadata': 24, + } + + elif event_source == 'internal': + c_types = {'host group': 0, + 'host': 1, + 'host template': 13, + 'application': 15, + 'event type': 23, + } + else: + raise ZabbixAPIError('Unkown event source %s' % event_source) + + return c_types[inc_condition] + +def get_operation_type(inc_operation): + ''' determine the correct operation type''' + o_types = {'send message': 0, + 'remote command': 1, + 'add host': 2, + 'remove host': 3, + 'add to host group': 4, + 'remove from host group': 5, + 'link to template': 6, + 'unlink from template': 7, + 'enable host': 8, + 'disable host': 9, + } + + return o_types[inc_operation] + +def get_action_operations(zapi, inc_operations): + '''Convert the operations into syntax for api''' + for operation in inc_operations: + operation['operationtype'] = get_operation_type(operation['operationtype']) + if operation['operationtype'] == 0: # send message. Need to fix the + operation['opmessage']['mediatypeid'] = \ + get_mediatype_id_by_name(zapi, operation['opmessage']['mediatypeid']) + operation['opmessage_grp'] = get_user_groups(zapi, operation.get('opmessage_grp', [])) + operation['opmessage_usr'] = get_users(zapi, operation.get('opmessage_usr', [])) + if operation['opmessage']['default_msg']: + operation['opmessage']['default_msg'] = 1 + else: + operation['opmessage']['default_msg'] = 0 + + # NOT supported for remote commands + elif operation['operationtype'] == 1: + continue + + # Handle Operation conditions: + # Currently there is only 1 available which + # is 'event acknowledged'. In the future + # if there are any added we will need to pass this + # option to a function and return the correct conditiontype + if operation.has_key('opconditions'): + for condition in operation['opconditions']: + if condition['conditiontype'] == 'event acknowledged': + condition['conditiontype'] = 14 + + if condition['operator'] == '=': + condition['operator'] = 0 + + if condition['value'] == 'acknowledged': + condition['operator'] = 1 + else: + condition['operator'] = 0 + + + return inc_operations + +def get_operation_evaltype(inc_type): + '''get the operation evaltype''' + rval = 0 + if inc_type == 'and/or': + rval = 0 + elif inc_type == 'and': + rval = 1 + elif inc_type == 'or': + rval = 2 + elif inc_type == 'custom': + rval = 3 + + return rval + +def get_action_conditions(zapi, event_source, inc_conditions): + '''Convert the conditions into syntax for api''' + + calc_type = inc_conditions.pop('calculation_type') + inc_conditions['evaltype'] = get_operation_evaltype(calc_type) + for cond in inc_conditions['conditions']: + + cond['operator'] = get_condition_operator(cond['operator']) + # Based on conditiontype we need to set the proper value + # e.g. conditiontype = hostgroup then the value needs to be a hostgroup id + # e.g. conditiontype = host the value needs to be a host id + cond['conditiontype'] = get_condition_type(event_source, cond['conditiontype']) + if cond['conditiontype'] == 0: + cond['value'] = get_host_group_id_by_name(zapi, cond['value']) + elif cond['conditiontype'] == 1: + cond['value'] = get_host_id_by_name(zapi, cond['value']) + elif cond['conditiontype'] == 4: + cond['value'] = get_priority(cond['value']) + + elif cond['conditiontype'] == 5: + cond['value'] = get_trigger_value(cond['value']) + elif cond['conditiontype'] == 13: + cond['value'] = get_template_id_by_name(zapi, cond['value']) + elif cond['conditiontype'] == 16: + cond['value'] = '' + + return inc_conditions + + +def get_send_recovery(send_recovery): + '''Get the integer value''' + rval = 0 + if send_recovery: + rval = 1 + + return rval + +# The branches are needed for CRUD and error handling +# pylint: disable=too-many-branches +def main(): + ''' + ansible zabbix module for zbx_item + ''' + + + module = AnsibleModule( + argument_spec=dict( + zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), + zbx_user=dict(default=os.environ.get('ZABBIX_USER', None), type='str'), + zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'), + zbx_debug=dict(default=False, type='bool'), + + name=dict(default=None, type='str'), + event_source=dict(default='trigger', choices=['trigger', 'discovery', 'auto', 'internal'], type='str'), + action_subject=dict(default="{TRIGGER.NAME}: {TRIGGER.STATUS}", type='str'), + action_message=dict(default="{TRIGGER.NAME}: {TRIGGER.STATUS}\r\n" + + "Last value: {ITEM.LASTVALUE}\r\n\r\n{TRIGGER.URL}", type='str'), + reply_subject=dict(default="{TRIGGER.NAME}: {TRIGGER.STATUS}", type='str'), + reply_message=dict(default="Trigger: {TRIGGER.NAME}\r\nTrigger status: {TRIGGER.STATUS}\r\n" + + "Trigger severity: {TRIGGER.SEVERITY}\r\nTrigger URL: {TRIGGER.URL}\r\n\r\n" + + "Item values:\r\n\r\n1. {ITEM.NAME1} ({HOST.NAME1}:{ITEM.KEY1}): " + + "{ITEM.VALUE1}\r\n2. {ITEM.NAME2} ({HOST.NAME2}:{ITEM.KEY2}): " + + "{ITEM.VALUE2}\r\n3. {ITEM.NAME3} ({HOST.NAME3}:{ITEM.KEY3}): " + + "{ITEM.VALUE3}", type='str'), + send_recovery=dict(default=False, type='bool'), + status=dict(default=None, type='str'), + escalation_time=dict(default=60, type='int'), + conditions_filter=dict(default=None, type='dict'), + operations=dict(default=None, type='list'), + state=dict(default='present', type='str'), + ), + #supports_check_mode=True + ) + + zapi = ZabbixAPI(ZabbixConnection(module.params['zbx_server'], + module.params['zbx_user'], + module.params['zbx_password'], + module.params['zbx_debug'])) + + #Set the instance and the template for the rest of the calls + zbx_class_name = 'action' + state = module.params['state'] + + content = zapi.get_content(zbx_class_name, + 'get', + {'search': {'name': module.params['name']}, + 'selectFilter': 'extend', + 'selectOperations': 'extend', + }) + + #******# + # GET + #******# + if state == 'list': + module.exit_json(changed=False, results=content['result'], state="list") + + #******# + # DELETE + #******# + if state == 'absent': + if not exists(content): + module.exit_json(changed=False, state="absent") + + content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0]['itemid']]) + module.exit_json(changed=True, results=content['result'], state="absent") + + # Create and Update + if state == 'present': + + conditions = get_action_conditions(zapi, module.params['event_source'], module.params['conditions_filter']) + operations = get_action_operations(zapi, module.params['operations']) + params = {'name': module.params['name'], + 'esc_period': module.params['escalation_time'], + 'eventsource': get_event_source(module.params['event_source']), + 'status': get_status(module.params['status']), + 'def_shortdata': module.params['action_subject'], + 'def_longdata': module.params['action_message'], + 'r_shortdata': module.params['reply_subject'], + 'r_longdata': module.params['reply_message'], + 'recovery_msg': get_send_recovery(module.params['send_recovery']), + 'filter': conditions, + 'operations': operations, + } + + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# + if not exists(content): + content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state='present') + + + ######## + # UPDATE + ######## + _ = params.pop('hostid', None) + differences = {} + zab_results = content['result'][0] + for key, value in params.items(): + + if key == 'operations': + ops = operation_differences(zab_results[key], value) + if ops: + differences[key] = ops + + elif key == 'filter': + filters = filter_differences(zab_results[key], value) + if filters: + differences[key] = filters + + elif zab_results[key] != value and zab_results[key] != str(value): + differences[key] = value + + if not differences: + module.exit_json(changed=False, results=zab_results, state="present") + + # We have differences and need to update. + # action update requires an id, filters, and operations + differences['actionid'] = zab_results['actionid'] + differences['operations'] = params['operations'] + differences['filter'] = params['filter'] + content = zapi.get_content(zbx_class_name, 'update', differences) + + if content.has_key('error'): + module.exit_json(failed=True, changed=False, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state="present") + + module.exit_json(failed=True, + changed=False, + results='Unknown state passed. %s' % state, + state="unknown") + +# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled +# import module snippets. This are required +from ansible.module_utils.basic import * + +main() diff --git a/roles/lib_zabbix/library/zbx_discoveryrule.py b/roles/lib_zabbix/library/zbx_discoveryrule.py index 71a0580c2..f52f350a5 100644 --- a/roles/lib_zabbix/library/zbx_discoveryrule.py +++ b/roles/lib_zabbix/library/zbx_discoveryrule.py @@ -85,6 +85,7 @@ def main(): Ansible module for zabbix discovery rules ''' + module = AnsibleModule( argument_spec=dict( zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), @@ -93,6 +94,7 @@ def main(): zbx_debug=dict(default=False, type='bool'), name=dict(default=None, type='str'), key=dict(default=None, type='str'), + description=dict(default=None, type='str'), interfaceid=dict(default=None, type='int'), ztype=dict(default='trapper', type='str'), delay=dict(default=60, type='int'), @@ -113,18 +115,27 @@ def main(): idname = "itemid" dname = module.params['name'] state = module.params['state'] + template = get_template(zapi, module.params['template_name']) # selectInterfaces doesn't appear to be working but is needed. content = zapi.get_content(zbx_class_name, 'get', {'search': {'name': dname}, + 'templateids': template['templateid'], #'selectDServices': 'extend', #'selectDChecks': 'extend', #'selectDhosts': 'dhostid', }) + + #******# + # GET + #******# if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") + #******# + # DELETE + #******# if state == 'absent': if not exists(content): module.exit_json(changed=False, state="absent") @@ -132,24 +143,37 @@ def main(): content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) module.exit_json(changed=True, results=content['result'], state="absent") + + # Create and Update if state == 'present': - template = get_template(zapi, module.params['template_name']) params = {'name': dname, 'key_': module.params['key'], 'hostid': template['templateid'], 'interfaceid': module.params['interfaceid'], 'lifetime': module.params['lifetime'], 'type': get_type(module.params['ztype']), + 'description': module.params['description'], } if params['type'] in [2, 5, 7, 11]: params.pop('interfaceid') + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# if not exists(content): - # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties + + ######## + # UPDATE + ######## differences = {} zab_results = content['result'][0] for key, value in params.items(): @@ -163,6 +187,10 @@ def main(): # We have differences and need to update differences[idname] = zab_results[idname] content = zapi.get_content(zbx_class_name, 'update', differences) + + if content.has_key('error'): + module.exit_json(failed=True, changed=False, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state="present") module.exit_json(failed=True, diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py index 2ccc21292..6faa82dfc 100644 --- a/roles/lib_zabbix/library/zbx_item.py +++ b/roles/lib_zabbix/library/zbx_item.py @@ -53,6 +53,8 @@ def get_value_type(value_type): vtype = 0 if 'int' in value_type: vtype = 3 + elif 'log' in value_type: + vtype = 2 elif 'char' in value_type: vtype = 1 elif 'str' in value_type: diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py index 24f85710d..e7fd6fa21 100644 --- a/roles/lib_zabbix/library/zbx_itemprototype.py +++ b/roles/lib_zabbix/library/zbx_itemprototype.py @@ -38,13 +38,14 @@ def exists(content, key='result'): return True -def get_rule_id(zapi, discoveryrule_name): +def get_rule_id(zapi, discoveryrule_key, templateid): '''get a discoveryrule by name ''' content = zapi.get_content('discoveryrule', 'get', - {'search': {'name': discoveryrule_name}, + {'search': {'key_': discoveryrule_key}, 'output': 'extend', + 'templateids': templateid, }) if not content['result']: return None @@ -53,6 +54,9 @@ def get_rule_id(zapi, discoveryrule_name): def get_template(zapi, template_name): '''get a template by name ''' + if not template_name: + return None + content = zapi.get_content('template', 'get', {'search': {'host': template_name}, @@ -124,16 +128,17 @@ def get_status(status): return _status -def get_app_ids(zapi, application_names): +def get_app_ids(zapi, application_names, templateid): ''' get application ids from names ''' app_ids = [] for app_name in application_names: - content = zapi.get_content('application', 'get', {'search': {'name': app_name}}) + content = zapi.get_content('application', 'get', {'filter': {'name': app_name}, 'templateids': templateid}) if content.has_key('result'): app_ids.append(content['result'][0]['applicationid']) return app_ids +# pylint: disable=too-many-branches def main(): ''' Ansible module for zabbix discovery rules @@ -147,16 +152,17 @@ def main(): zbx_debug=dict(default=False, type='bool'), name=dict(default=None, type='str'), key=dict(default=None, type='str'), + description=dict(default=None, type='str'), interfaceid=dict(default=None, type='int'), ztype=dict(default='trapper', type='str'), value_type=dict(default='float', type='str'), delay=dict(default=60, type='int'), lifetime=dict(default=30, type='int'), - template_name=dict(default=[], type='list'), state=dict(default='present', type='str'), status=dict(default='enabled', type='str'), - discoveryrule_name=dict(default=None, type='str'), applications=dict(default=[], type='list'), + template_name=dict(default=None, type='str'), + discoveryrule_key=dict(default=None, type='str'), ), #supports_check_mode=True ) @@ -169,20 +175,27 @@ def main(): #Set the instance and the template for the rest of the calls zbx_class_name = 'itemprototype' idname = "itemid" - dname = module.params['name'] state = module.params['state'] + template = get_template(zapi, module.params['template_name']) # selectInterfaces doesn't appear to be working but is needed. content = zapi.get_content(zbx_class_name, 'get', - {'search': {'name': dname}, + {'search': {'key_': module.params['key']}, 'selectApplications': 'applicationid', 'selectDiscoveryRule': 'itemid', - #'selectDhosts': 'dhostid', + 'templated': True, }) + + #******# + # GET + #******# if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") + #******# + # DELETE + #******# if state == 'absent': if not exists(content): module.exit_json(changed=False, state="absent") @@ -190,26 +203,39 @@ def main(): content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) module.exit_json(changed=True, results=content['result'], state="absent") + # Create and Update if state == 'present': - template = get_template(zapi, module.params['template_name']) - params = {'name': dname, + params = {'name': module.params['name'], 'key_': module.params['key'], 'hostid': template['templateid'], 'interfaceid': module.params['interfaceid'], - 'ruleid': get_rule_id(zapi, module.params['discoveryrule_name']), + 'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']), 'type': get_type(module.params['ztype']), 'value_type': get_value_type(module.params['value_type']), - 'applications': get_app_ids(zapi, module.params['applications']), + 'applications': get_app_ids(zapi, module.params['applications'], template['templateid']), + 'description': module.params['description'], } + if params['type'] in [2, 5, 7, 8, 11, 15]: params.pop('interfaceid') + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# if not exists(content): - # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=False, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties + + #******# + # UPDATE + #******# differences = {} zab_results = content['result'][0] for key, value in params.items(): @@ -218,6 +244,11 @@ def main(): if value != zab_results['discoveryRule']['itemid']: differences[key] = value + elif key == 'applications': + app_ids = [app['applicationid'] for app in zab_results[key]] + if set(app_ids) - set(value): + differences[key] = value + elif zab_results[key] != value and zab_results[key] != str(value): differences[key] = value @@ -227,6 +258,10 @@ def main(): # We have differences and need to update differences[idname] = zab_results[idname] content = zapi.get_content(zbx_class_name, 'update', differences) + + if content.has_key('error'): + module.exit_json(failed=True, changed=False, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state="present") module.exit_json(failed=True, diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py index a05de7e68..ab7731faa 100644 --- a/roles/lib_zabbix/library/zbx_trigger.py +++ b/roles/lib_zabbix/library/zbx_trigger.py @@ -74,6 +74,36 @@ def get_deps(zapi, deps): return results + +def get_trigger_status(inc_status): + ''' Determine the trigger's status + 0 is enabled + 1 is disabled + ''' + r_status = 0 + if inc_status == 'disabled': + r_status = 1 + + return r_status + +def get_template_id(zapi, template_name): + ''' + get related templates + ''' + template_ids = [] + app_ids = {} + # Fetch templates by name + content = zapi.get_content('template', + 'get', + {'search': {'host': template_name}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + template_ids.append(content['result'][0]['templateid']) + for app in content['result'][0]['applications']: + app_ids[app['name']] = app['applicationid'] + + return template_ids, app_ids + def main(): ''' Create a trigger in zabbix @@ -103,7 +133,9 @@ def main(): dependencies=dict(default=[], type='list'), priority=dict(default='avg', type='str'), url=dict(default=None, type='str'), + status=dict(default=None, type='str'), state=dict(default='present', type='str'), + template_name=dict(default=None, type='str'), ), #supports_check_mode=True ) @@ -119,11 +151,16 @@ def main(): state = module.params['state'] tname = module.params['name'] + templateid = None + if module.params['template_name']: + templateid, _ = get_template_id(zapi, module.params['template_name']) + content = zapi.get_content(zbx_class_name, 'get', {'filter': {'description': tname}, 'expandExpression': True, 'selectDependencies': 'triggerid', + 'templateids': templateid, }) # Get @@ -145,6 +182,7 @@ def main(): 'dependencies': get_deps(zapi, module.params['dependencies']), 'priority': get_priority(module.params['priority']), 'url': module.params['url'], + 'status': get_trigger_status(module.params['status']), } # Remove any None valued params @@ -156,6 +194,10 @@ def main(): if not exists(content): # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state='present') ######## diff --git a/roles/lib_zabbix/library/zbx_triggerprototype.py b/roles/lib_zabbix/library/zbx_triggerprototype.py new file mode 100644 index 000000000..c1224b268 --- /dev/null +++ b/roles/lib_zabbix/library/zbx_triggerprototype.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python +''' +ansible module for zabbix triggerprototypes +''' +# vim: expandtab:tabstop=4:shiftwidth=4 +# +# Zabbix triggerprototypes ansible module +# +# +# Copyright 2015 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This is in place because each module looks similar to each other. +# These need duplicate code as their behavior is very similar +# but different for each zabbix class. +# pylint: disable=duplicate-code + +# pylint: disable=import-error +from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection + +def exists(content, key='result'): + ''' Check if key exists in content or the size of content[key] > 0 + ''' + if not content.has_key(key): + return False + + if not content[key]: + return False + + return True + +def get_priority(priority): + ''' determine priority + ''' + prior = 0 + if 'info' in priority: + prior = 1 + elif 'warn' in priority: + prior = 2 + elif 'avg' == priority or 'ave' in priority: + prior = 3 + elif 'high' in priority: + prior = 4 + elif 'dis' in priority: + prior = 5 + + return prior + +def get_trigger_status(inc_status): + ''' Determine the trigger's status + 0 is enabled + 1 is disabled + ''' + r_status = 0 + if inc_status == 'disabled': + r_status = 1 + + return r_status + + +def main(): + ''' + Create a triggerprototype in zabbix + ''' + + module = AnsibleModule( + argument_spec=dict( + zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), + zbx_user=dict(default=os.environ.get('ZABBIX_USER', None), type='str'), + zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'), + zbx_debug=dict(default=False, type='bool'), + name=dict(default=None, type='str'), + expression=dict(default=None, type='str'), + description=dict(default=None, type='str'), + priority=dict(default='avg', type='str'), + url=dict(default=None, type='str'), + status=dict(default=None, type='str'), + state=dict(default='present', type='str'), + ), + #supports_check_mode=True + ) + + zapi = ZabbixAPI(ZabbixConnection(module.params['zbx_server'], + module.params['zbx_user'], + module.params['zbx_password'], + module.params['zbx_debug'])) + + #Set the instance and the template for the rest of the calls + zbx_class_name = 'triggerprototype' + idname = "triggerid" + state = module.params['state'] + tname = module.params['name'] + + content = zapi.get_content(zbx_class_name, + 'get', + {'filter': {'description': tname}, + 'expandExpression': True, + 'selectDependencies': 'triggerid', + }) + + # Get + if state == 'list': + module.exit_json(changed=False, results=content['result'], state="list") + + # Delete + if state == 'absent': + if not exists(content): + module.exit_json(changed=False, state="absent") + content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) + module.exit_json(changed=True, results=content['result'], state="absent") + + # Create and Update + if state == 'present': + params = {'description': tname, + 'comments': module.params['description'], + 'expression': module.params['expression'], + 'priority': get_priority(module.params['priority']), + 'url': module.params['url'], + 'status': get_trigger_status(module.params['status']), + } + + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# + if not exists(content): + # if we didn't find it, create it + content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state='present') + + ######## + # UPDATE + ######## + differences = {} + zab_results = content['result'][0] + for key, value in params.items(): + + if zab_results[key] != value and zab_results[key] != str(value): + differences[key] = value + + if not differences: + module.exit_json(changed=False, results=zab_results, state="present") + + # We have differences and need to update + differences[idname] = zab_results[idname] + content = zapi.get_content(zbx_class_name, 'update', differences) + module.exit_json(changed=True, results=content['result'], state="present") + + + module.exit_json(failed=True, + changed=False, + results='Unknown state passed. %s' % state, + state="unknown") + +# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled +# import module snippets. This are required +from ansible.module_utils.basic import * + +main() diff --git a/roles/lib_zabbix/library/zbx_user_media.py b/roles/lib_zabbix/library/zbx_user_media.py index 3f7760475..8895c78c3 100644 --- a/roles/lib_zabbix/library/zbx_user_media.py +++ b/roles/lib_zabbix/library/zbx_user_media.py @@ -54,8 +54,8 @@ def get_mtype(zapi, mtype): except ValueError: pass - content = zapi.get_content('mediatype', 'get', {'search': {'description': mtype}}) - if content.has_key['result'] and content['result']: + content = zapi.get_content('mediatype', 'get', {'filter': {'description': mtype}}) + if content.has_key('result') and content['result']: return content['result'][0]['mediatypeid'] return None @@ -63,7 +63,7 @@ def get_mtype(zapi, mtype): def get_user(zapi, user): ''' Get userids from user aliases ''' - content = zapi.get_content('user', 'get', {'search': {'alias': user}}) + content = zapi.get_content('user', 'get', {'filter': {'alias': user}}) if content['result']: return content['result'][0] @@ -104,15 +104,17 @@ def find_media(medias, user_media): ''' Find the user media in the list of medias ''' for media in medias: - if all([media[key] == user_media[key] for key in user_media.keys()]): + if all([media[key] == str(user_media[key]) for key in user_media.keys()]): return media return None -def get_active(in_active): +def get_active(is_active): '''Determine active value + 0 - enabled + 1 - disabled ''' active = 1 - if in_active: + if is_active: active = 0 return active @@ -128,6 +130,21 @@ def get_mediatype(zapi, mediatype, mediatype_desc): return mtypeid +def preprocess_medias(zapi, medias): + ''' Insert the correct information when processing medias ''' + for media in medias: + # Fetch the mediatypeid from the media desc (name) + if media.has_key('mediatype'): + media['mediatypeid'] = get_mediatype(zapi, mediatype=None, mediatype_desc=media.pop('mediatype')) + + media['active'] = get_active(media.get('active')) + media['severity'] = int(get_severity(media['severity'])) + + return medias + +# Disabling branching as the logic requires branches. +# I've also added a few safeguards which required more branches. +# pylint: disable=too-many-branches def main(): ''' Ansible zabbix module for mediatype @@ -166,11 +183,17 @@ def main(): # User media is fetched through the usermedia.get zbx_user_query = get_zbx_user_query_data(zapi, module.params['login']) - content = zapi.get_content('usermedia', 'get', zbx_user_query) - + content = zapi.get_content('usermedia', 'get', + {'userids': [uid for user, uid in zbx_user_query.items()]}) + ##### + # Get + ##### if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") + ######## + # Delete + ######## if state == 'absent': if not exists(content) or len(content['result']) == 0: module.exit_json(changed=False, state="absent") @@ -178,13 +201,14 @@ def main(): if not module.params['login']: module.exit_json(failed=True, changed=False, results='Must specifiy a user login.', state="absent") - content = zapi.get_content(zbx_class_name, 'deletemedia', [content['result'][0][idname]]) + content = zapi.get_content(zbx_class_name, 'deletemedia', [res[idname] for res in content['result']]) if content.has_key('error'): module.exit_json(changed=False, results=content['error'], state="absent") module.exit_json(changed=True, results=content['result'], state="absent") + # Create and Update if state == 'present': active = get_active(module.params['active']) mtypeid = get_mediatype(zapi, module.params['mediatype'], module.params['mediatype_desc']) @@ -197,13 +221,21 @@ def main(): 'severity': int(get_severity(module.params['severity'])), 'period': module.params['period'], }] + else: + medias = preprocess_medias(zapi, medias) params = {'users': [zbx_user_query], 'medias': medias, 'output': 'extend', } + ######## + # Create + ######## if not exists(content): + if not params['medias']: + module.exit_json(changed=False, results=content['result'], state='present') + # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'addmedia', params) @@ -216,6 +248,9 @@ def main(): # If user params exists, check to see if they already exist in zabbix # if they exist, then return as no update # elif they do not exist, then take user params only + ######## + # Update + ######## diff = {'medias': [], 'users': {}} _ = [diff['medias'].append(media) for media in params['medias'] if not find_media(content['result'], media)] @@ -225,6 +260,9 @@ def main(): for user in params['users']: diff['users']['userid'] = user['userid'] + # Medias have no real unique key so therefore we need to make it like the incoming user's request + diff['medias'] = medias + # We have differences and need to update content = zapi.get_content(zbx_class_name, 'updatemedia', diff) diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml index fd0cdd46f..41381e76c 100644 --- a/roles/lib_zabbix/tasks/create_template.yml +++ b/roles/lib_zabbix/tasks/create_template.yml @@ -9,7 +9,8 @@ - set_fact: - lzbx_applications: "{{ template.zitems | oo_select_keys_from_list(['applications']) | oo_flatten | unique }}" + lzbx_item_applications: "{{ template.zitems | default([], True) | oo_select_keys_from_list(['applications']) | oo_flatten | unique }}" + lzbx_itemprototype_applications: "{{ template.zitemprototypes | default([], True) | oo_select_keys_from_list(['applications']) | oo_flatten | unique }}" - name: Create Application zbx_application: @@ -18,9 +19,11 @@ zbx_password: "{{ password }}" name: "{{ item }}" template_name: "{{ template.name }}" - with_items: lzbx_applications + with_items: + - "{{ lzbx_item_applications }}" + - "{{ lzbx_itemprototype_applications }}" register: created_application - when: template.zitems is defined + when: template.zitems is defined or template.zitemprototypes is defined - name: Create Items zbx_item: @@ -52,3 +55,44 @@ url: "{{ item.url | default(None, True) }}" with_items: template.ztriggers when: template.ztriggers is defined + +- name: Create Discoveryrules + zbx_discoveryrule: + zbx_server: "{{ server }}" + zbx_user: "{{ user }}" + zbx_password: "{{ password }}" + name: "{{ item.name }}" + key: "{{ item.key }}" + lifetime: "{{ item.lifetime }}" + template_name: "{{ template.name }}" + description: "{{ item.description | default('', True) }}" + with_items: template.zdiscoveryrules + when: template.zdiscoveryrules is defined + +- name: Create Item Prototypes + zbx_itemprototype: + zbx_server: "{{ server }}" + zbx_user: "{{ user }}" + zbx_password: "{{ password }}" + name: "{{ item.name }}" + key: "{{ item.key }}" + discoveryrule_key: "{{ item.discoveryrule_key }}" + value_type: "{{ item.value_type }}" + template_name: "{{ template.name }}" + applications: "{{ item.applications }}" + description: "{{ item.description | default('', True) }}" + with_items: template.zitemprototypes + when: template.zitemprototypes is defined + +- name: Create Trigger Prototypes + zbx_triggerprototype: + zbx_server: "{{ server }}" + zbx_user: "{{ user }}" + zbx_password: "{{ password }}" + name: "{{ item.name }}" + expression: "{{ item.expression }}" + url: "{{ item.url | default('', True) }}" + priority: "{{ item.priority | default('average', True) }}" + description: "{{ item.description | default('', True) }}" + with_items: template.ztriggerprototypes + when: template.ztriggerprototypes is defined diff --git a/roles/openshift_examples/defaults/main.yml b/roles/openshift_examples/defaults/main.yml index 3246790aa..7d4f100e3 100644 --- a/roles/openshift_examples/defaults/main.yml +++ b/roles/openshift_examples/defaults/main.yml @@ -14,3 +14,5 @@ db_templates_base: "{{ examples_base }}/db-templates" xpaas_image_streams: "{{ examples_base }}/xpaas-streams/jboss-image-streams.json" xpaas_templates_base: "{{ examples_base }}/xpaas-templates" quickstarts_base: "{{ examples_base }}/quickstart-templates" + +openshift_examples_import_command: "create" diff --git a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json index 0497e6824..5df36ccc2 100644 --- a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json +++ b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json @@ -6,10 +6,10 @@ "iconClass" : "icon-jboss", "description": "Application template for EAP 6 applications built using STI." }, - "name": "eap6-basic-sti" + "name": "eap6-https-sti" }, "labels": { - "template": "eap6-basic-sti" + "template": "eap6-https-sti" }, "parameters": [ { diff --git a/roles/openshift_examples/tasks/main.yml b/roles/openshift_examples/tasks/main.yml index bfc6dfb0a..3a829a4c6 100644 --- a/roles/openshift_examples/tasks/main.yml +++ b/roles/openshift_examples/tasks/main.yml @@ -7,7 +7,7 @@ # RHEL and Centos image streams are mutually exclusive - name: Import RHEL streams command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ rhel_image_streams }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ rhel_image_streams }} when: openshift_examples_load_rhel register: oex_import_rhel_streams failed_when: "'already exists' not in oex_import_rhel_streams.stderr and oex_import_rhel_streams.rc != 0" @@ -15,7 +15,7 @@ - name: Import Centos Image streams command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ centos_image_streams }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ centos_image_streams }} when: openshift_examples_load_centos | bool register: oex_import_centos_streams failed_when: "'already exists' not in oex_import_centos_streams.stderr and oex_import_centos_streams.rc != 0" @@ -23,7 +23,7 @@ - name: Import db templates command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ db_templates_base }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ db_templates_base }} when: openshift_examples_load_db_templates | bool register: oex_import_db_templates failed_when: "'already exists' not in oex_import_db_templates.stderr and oex_import_db_templates.rc != 0" @@ -31,7 +31,7 @@ - name: Import quickstart-templates command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ quickstarts_base }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ quickstarts_base }} when: openshift_examples_load_quickstarts register: oex_import_quickstarts failed_when: "'already exists' not in oex_import_quickstarts.stderr and oex_import_quickstarts.rc != 0" @@ -40,7 +40,7 @@ - name: Import xPaas image streams command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ xpaas_image_streams }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ xpaas_image_streams }} when: openshift_examples_load_xpaas | bool register: oex_import_xpaas_streams failed_when: "'already exists' not in oex_import_xpaas_streams.stderr and oex_import_xpaas_streams.rc != 0" @@ -48,7 +48,7 @@ - name: Import xPaas templates command: > - {{ openshift.common.client_binary }} create -n openshift -f {{ xpaas_templates_base }} + {{ openshift.common.client_binary }} {{ openshift_examples_import_command }} -n openshift -f {{ xpaas_templates_base }} when: openshift_examples_load_xpaas | bool register: oex_import_xpaas_templates failed_when: "'already exists' not in oex_import_xpaas_templates.stderr and oex_import_xpaas_templates.rc != 0" diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 60d1226d4..69bb49c9b 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -296,9 +296,8 @@ def set_fluentd_facts_if_unset(facts): """ if 'common' in facts: - deployment_type = facts['common']['deployment_type'] if 'use_fluentd' not in facts['common']: - use_fluentd = True if deployment_type == 'online' else False + use_fluentd = False facts['common']['use_fluentd'] = use_fluentd return facts @@ -461,7 +460,7 @@ def set_deployment_facts_if_unset(facts): if 'service_type' not in facts['common']: service_type = 'atomic-openshift' if deployment_type == 'origin': - service_type = 'openshift' + service_type = 'origin' elif deployment_type in ['enterprise', 'online']: service_type = 'openshift' facts['common']['service_type'] = service_type @@ -469,26 +468,23 @@ def set_deployment_facts_if_unset(facts): config_base = '/etc/origin' if deployment_type in ['enterprise', 'online']: config_base = '/etc/openshift' - elif deployment_type == 'origin': - config_base = '/etc/openshift' facts['common']['config_base'] = config_base if 'data_dir' not in facts['common']: data_dir = '/var/lib/origin' if deployment_type in ['enterprise', 'online']: data_dir = '/var/lib/openshift' - elif deployment_type == 'origin': - data_dir = '/var/lib/openshift' facts['common']['data_dir'] = data_dir + facts['common']['version'] = get_openshift_version() for role in ('master', 'node'): if role in facts: deployment_type = facts['common']['deployment_type'] if 'registry_url' not in facts[role]: - registry_url = 'aos3/aos-${component}:${version}' - if deployment_type in ['enterprise', 'online']: + registry_url = 'openshift/origin-${component}:${version}' + if deployment_type in ['enterprise', 'online', 'openshift-enterprise']: registry_url = 'openshift3/ose-${component}:${version}' - elif deployment_type == 'origin': - registry_url = 'openshift/origin-${component}:${version}' + elif deployment_type == 'atomic-enterprise': + registry_url = 'aep3/aep-${component}:${version}' facts[role]['registry_url'] = registry_url return facts @@ -603,6 +599,21 @@ def get_current_config(facts): return current_config +def get_openshift_version(): + """ Get current version of openshift on the host + + Returns: + version: the current openshift version + """ + version = '' + + if os.path.isfile('/usr/bin/openshift'): + _, output, _ = module.run_command(['/usr/bin/openshift', 'version']) + versions = dict(e.split(' v') for e in output.splitlines()) + version = versions.get('openshift', '') + + #TODO: acknowledge the possility of a containerized install + return version def apply_provider_facts(facts, provider_facts): """ Apply provider facts to supplied facts dict @@ -648,7 +659,7 @@ def merge_facts(orig, new): facts = dict() for key, value in orig.iteritems(): if key in new: - if isinstance(value, dict): + if isinstance(value, dict) and isinstance(new[key], dict): facts[key] = merge_facts(value, new[key]) else: facts[key] = copy.copy(new[key]) diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index fd3d20800..6301d4fc0 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -1,5 +1,5 @@ --- -- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 +- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 and not 1.9.0.1 assert: that: - ansible_version | version_compare('1.8.0', 'ge') diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml index 7c4f45ce6..637e494ea 100644 --- a/roles/openshift_manage_node/tasks/main.yml +++ b/roles/openshift_manage_node/tasks/main.yml @@ -1,21 +1,21 @@ - name: Wait for Node Registration command: > - {{ openshift.common.client_binary }} get node {{ item }} + {{ openshift.common.client_binary }} get node {{ item | lower }} register: omd_get_node until: omd_get_node.rc == 0 - retries: 10 + retries: 20 delay: 5 with_items: openshift_nodes - name: Set node schedulability command: > - {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }} + {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname | lower }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }} with_items: - "{{ openshift_node_vars }}" - name: Label nodes command: > - {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname }} {{ item.openshift.node.labels | oo_combine_dict }} + {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname | lower }} {{ item.openshift.node.labels | oo_combine_dict }} with_items: - "{{ openshift_node_vars }}" when: "'labels' in item.openshift.node and item.openshift.node.labels != {}" diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 29b56656f..73c04cb08 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -100,6 +100,7 @@ template: dest: "{{ openshift_master_scheduler_conf }}" src: scheduler.json.j2 + backup: true notify: - restart master @@ -129,6 +130,7 @@ template: dest: "{{ openshift_master_config_file }}" src: master.yaml.v1.j2 + backup: true notify: - restart master diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index 500690523..cc1dee13d 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -87,7 +87,9 @@ masterPublicURL: {{ openshift.master.public_api_url }} networkConfig: clusterNetworkCIDR: {{ openshift.master.sdn_cluster_network_cidr }} hostSubnetLength: {{ openshift.master.sdn_host_subnet_length }} + {% if openshift.common.use_openshift_sdn %} networkPluginName: {{ openshift.common.sdn_network_plugin_name }} + {% endif %} # serviceNetworkCIDR must match kubernetesMasterConfig.servicesSubnet serviceNetworkCIDR: {{ openshift.master.portal_net }} {% include 'v1_partials/oauthConfig.j2' %} diff --git a/roles/openshift_master/templates/scheduler.json.j2 b/roles/openshift_master/templates/scheduler.json.j2 index 835f2383e..cb5f43bb2 100644 --- a/roles/openshift_master/templates/scheduler.json.j2 +++ b/roles/openshift_master/templates/scheduler.json.j2 @@ -1,4 +1,6 @@ { + "kind": "Policy", + "apiVersion": "v1", "predicates": [ {"name": "MatchNodeSelector"}, {"name": "PodFitsResources"}, diff --git a/roles/openshift_master/templates/v1_partials/oauthConfig.j2 b/roles/openshift_master/templates/v1_partials/oauthConfig.j2 index 72889bc29..8a4f5a746 100644 --- a/roles/openshift_master/templates/v1_partials/oauthConfig.j2 +++ b/roles/openshift_master/templates/v1_partials/oauthConfig.j2 @@ -80,6 +80,7 @@ oauthConfig: provider: {{ identity_provider_config(identity_provider) }} {%- endfor %} + masterCA: ca.crt masterPublicURL: {{ openshift.master.public_api_url }} masterURL: {{ openshift.master.api_url }} sessionConfig: diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 1986b631e..d45dd8073 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -22,7 +22,7 @@ deployment_type: "{{ openshift_deployment_type }}" - role: node local_facts: - labels: "{{ openshift_node_labels | default(none) }}" + labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default(none), true) }}" annotations: "{{ openshift_node_annotations | default(none) }}" registry_url: "{{ oreg_url | default(none) }}" debug_level: "{{ openshift_node_debug_level | default(openshift.common.debug_level) }}" @@ -47,6 +47,7 @@ template: dest: "{{ openshift_node_config_file }}" src: node.yaml.v1.j2 + backup: true notify: - restart node diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2 index 07d80f99b..4931d127e 100644 --- a/roles/openshift_node/templates/node.yaml.v1.j2 +++ b/roles/openshift_node/templates/node.yaml.v1.j2 @@ -12,13 +12,17 @@ kind: NodeConfig kubeletArguments: {{ openshift.node.kubelet_args | to_json }} {% endif %} masterKubeConfig: system:node:{{ openshift.common.hostname }}.kubeconfig +{% if openshift.common.use_openshift_sdn %} networkPluginName: {{ openshift.common.sdn_network_plugin_name }} +{% endif %} # networkConfig struct introduced in origin 1.0.6 and OSE 3.0.2 which # deprecates networkPluginName above. The two should match. networkConfig: mtu: {{ openshift.node.sdn_mtu }} +{% if openshift.common.use_openshift_sdn %} networkPluginName: {{ openshift.common.sdn_network_plugin_name }} -nodeName: {{ openshift.common.hostname }} +{% endif %} +nodeName: {{ openshift.common.hostname | lower }} podManifestConfig: servingInfo: bindAddress: 0.0.0.0:10250 diff --git a/roles/openshift_serviceaccounts/tasks/main.yml b/roles/openshift_serviceaccounts/tasks/main.yml index 9665d0a72..d93a25a21 100644 --- a/roles/openshift_serviceaccounts/tasks/main.yml +++ b/roles/openshift_serviceaccounts/tasks/main.yml @@ -23,4 +23,4 @@ with_items: accounts - name: Apply new scc rules for service accounts - command: "{{ openshift.common.client_binary }} replace -f /tmp/scc.yaml" + command: "{{ openshift.common.client_binary }} update -f /tmp/scc.yaml" diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index f9672b9c4..a503b24d7 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -11,6 +11,10 @@ - include_vars: template_os_linux.yml - include_vars: template_docker.yml - include_vars: template_openshift_master.yml +- include_vars: template_openshift_node.yml +- include_vars: template_ops_tools.yml +- include_vars: template_app_zabbix_server.yml +- include_vars: template_app_zabbix_agent.yml - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -43,3 +47,35 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + +- name: Include Template Openshift Node + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_openshift_node }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template Ops Tools + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_ops_tools }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Server + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_server }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Agent + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_agent }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml new file mode 100644 index 000000000..06c4eda8b --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -0,0 +1,23 @@ +--- +g_template_app_zabbix_agent: + name: Template App Zabbix Agent + zitems: + - key: agent.hostname + applications: + - Zabbix agent + value_type: character + zabbix_type: '0' + + - key: agent.ping + applications: + - Zabbix agent + description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. + value_type: int + zabbix_type: '0' + + ztriggers: + - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' + description: Zabbix agent is unreachable for 15 minutes. + expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1' + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml new file mode 100644 index 000000000..dace2aa29 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -0,0 +1,408 @@ +--- +g_template_app_zabbix_server: + name: Template App Zabbix Server + zitems: + - key: housekeeper_creates + applications: + - Zabbix server + description: A simple count of the number of partition creates output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_drops + applications: + - Zabbix server + description: A simple count of the number of partition drops output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_errors + applications: + - Zabbix server + description: A simple count of the number of errors output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_total + applications: + - Zabbix server + description: A simple count of the total number of lines output by the housekeeper + script. + units: '' + value_type: int + zabbix_type: '2' + + - key: zabbix[process,alerter,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,configuration syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,db watchdog,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,discoverer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,escalator,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,history syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,housekeeper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,http poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,icmp pinger,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,ipmi poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,java poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,node watcher,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,proxy poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,self-monitoring,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,snmp trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,timer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,unreachable poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[queue,10m] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[queue] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[rcache,buffer,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,history,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,text,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,trend,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,values] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + ztriggers: + - description: "There has been unexpected output while running the housekeeping script\ + \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ + \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\ + \ for more details." + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}' + name: Unexpected output in Zabbix DB Housekeeping + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc + + - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details. + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0' + name: Errors during Zabbix DB Housekeeping + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75' + name: Zabbix alerter processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75' + name: Zabbix configuration syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75' + name: Zabbix db watchdog processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75' + name: Zabbix discoverer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75' + name: Zabbix escalator processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75' + name: Zabbix history syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75' + name: Zabbix housekeeper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75' + name: Zabbix http poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75' + name: Zabbix icmp pinger processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75' + name: Zabbix ipmi poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75' + name: Zabbix java poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75' + name: Zabbix node watcher processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75' + name: Zabbix poller processes more than 75% busy + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75' + name: Zabbix proxy poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75' + name: Zabbix self-monitoring processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75' + name: Zabbix snmp trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: Timer processes usually are busy because they have to process time + based trigger functions + expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75' + name: Zabbix timer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75' + name: Zabbix trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75' + name: Zabbix unreachable poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: "This alert generally indicates a performance problem or a problem\ + \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\ + \ is Administration > Queue. Be sure to check the general view and the per-proxy\ + \ view." + expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000' + name: More than 1000 items having missing data for more than 10 minutes + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc + + - description: Consider increasing CacheSize in the zabbix_server.conf configuration + file + expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5' + name: Less than 5% free in the configuration cache + priority: info + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25' + name: Less than 25% free in the history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25' + name: Less than 25% free in the text history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25' + name: Less than 25% free in the trends cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 728423ac1..1de4fefbb 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -6,8 +6,53 @@ g_template_openshift_master: applications: - Openshift Master key: create_app + + - key: openshift.master.process.count + description: Shows number of master processes running + type: int + applications: + - Openshift Master + + - key: openshift.master.user.count + description: Shows number of users in a cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pod.running.count + description: Shows number of pods running + type: int + applications: + - Openshift Master + + - key: openshift.project.counter + description: Shows number of projects on a cluster + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg + + - name: 'Openshift Master process not running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Too many Openshift Master processes running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Number of users for Openshift Master on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info + + - name: 'There are no projects running on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.project.counter.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml new file mode 100644 index 000000000..ce28b1048 --- /dev/null +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -0,0 +1,44 @@ +--- +g_template_openshift_node: + name: Template Openshift Node + zitems: + - key: openshift.node.process.count + description: Shows number of OpenShift Node processes running + type: int + applications: + - Openshift Node + + - key: openshift.node.ovs.pids.count + description: Shows number of ovs process ids running + type: int + applications: + - Openshift Node + + - key: openshift.node.ovs.ports.count + description: Shows number of OVS ports defined + type: int + applications: + - Openshift Node + + ztriggers: + - name: 'Openshift Node process not running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'Too many Openshift Node processes running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'OVS may not be running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last()}<>4' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'Number of OVS ports is 0 on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + diff --git a/roles/os_zabbix/vars/template_ops_tools.yml b/roles/os_zabbix/vars/template_ops_tools.yml new file mode 100644 index 000000000..d1b8a2514 --- /dev/null +++ b/roles/os_zabbix/vars/template_ops_tools.yml @@ -0,0 +1,23 @@ +--- +g_template_ops_tools: + name: Template Operations Tools + zdiscoveryrules: + - name: disc.ops.runner + key: disc.ops.runner + lifetime: 1 + description: "Dynamically register operations runner items" + + zitemprototypes: + - discoveryrule_key: disc.ops.runner + name: "Exit code of ops-runner[{#OSO_COMMAND}]" + key: "disc.ops.runner.command.exitcode[{#OSO_COMMAND}]" + value_type: int + description: "The exit code of the command run from ops-runner" + applications: + - Ops Runner + + ztriggerprototypes: + - name: 'ops-runner[{#OSO_COMMAND}]: non-zero exit code on {HOST.NAME}' + expression: '{Template Operations Tools:disc.ops.runner.command.exitcode[{#OSO_COMMAND}].last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_ops_runner_command.asciidoc' + priority: average diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 3173c79b2..69432273f 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -10,17 +10,20 @@ g_template_os_linux: - key: kernel.all.cpu.wait.total applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.cpu.irq.hard applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.cpu.idle applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.uname.distro applications: @@ -35,7 +38,8 @@ g_template_os_linux: - key: kernel.all.cpu.irq.soft applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.load.15_minute applications: @@ -45,7 +49,8 @@ g_template_os_linux: - key: kernel.all.cpu.sys applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.load.5_minute applications: @@ -55,7 +60,8 @@ g_template_os_linux: - key: kernel.all.cpu.nice applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.load.1_minute applications: @@ -75,7 +81,8 @@ g_template_os_linux: - key: kernel.all.cpu.user applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.uname.machine applications: @@ -90,7 +97,8 @@ g_template_os_linux: - key: kernel.all.cpu.steal applications: - Kernel - value_type: int + value_type: float + units: '%' - key: kernel.all.pswitch applications: @@ -180,38 +188,51 @@ g_template_os_linux: multiplier: 1024 units: B - # Disk items - - key: filesys.full.xvda2 + zdiscoveryrules: + - name: disc.filesys + key: disc.filesys + lifetime: 1 + description: "Dynamically register the filesystems" + + zitemprototypes: + - discoveryrule_key: disc.filesys + name: "disc.filesys.full.{#OSO_FILESYS}" + key: "disc.filesys.full[{#OSO_FILESYS}]" + value_type: float + description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full" applications: - Disk - value_type: float - - key: filesys.full.xvda3 + - discoveryrule_key: disc.filesys + name: "Percentage of used inodes on {#OSO_FILESYS}" + key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]" + value_type: float + description: "PCP derived value of percentage of used inodes on a filesystem." applications: - Disk - value_type: float - ztriggers: - - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' + ztriggerprototypes: + - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - name: 'Filesystem: / has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' + - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high + ztriggers: - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' @@ -222,3 +243,18 @@ g_template_os_linux: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024' + + # CPU Utilization # + - name: 'CPU idle less than 5% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: high + description: 'CPU is less than 5% idle' + + - name: 'CPU idle less than 10% on {HOST.NAME}' + expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' + priority: warn + description: 'CPU is less than 10% idle' + dependencies: + - 'CPU idle less than 5% on {HOST.NAME}' |