diff options
33 files changed, 740 insertions, 133 deletions
diff --git a/README_GCE.md b/README_GCE.md index f6c5138c1..50f8ade70 100644 --- a/README_GCE.md +++ b/README_GCE.md @@ -39,6 +39,13 @@ Create a gce.ini file for GCE * gce_service_account_pem_file_path - Full path from previous steps * gce_project_id - Found in "Projects", it list all the gce projects you are associated with. The page lists their "Project Name" and "Project ID". You want the "Project ID" +Mandatory customization variables (check the values according to your tenant): +* zone = europe-west1-d +* network = default +* gce_machine_type = n1-standard-2 +* gce_machine_image = preinstalled-slave-50g-v5 + + 1. vi ~/.gce/gce.ini 1. make the contents look like this: ``` @@ -46,11 +53,15 @@ Create a gce.ini file for GCE gce_service_account_email_address = long...@developer.gserviceaccount.com gce_service_account_pem_file_path = /full/path/to/project_id-gce_key_hash.pem gce_project_id = project_id +zone = europe-west1-d +network = default +gce_machine_type = n1-standard-2 +gce_machine_image = preinstalled-slave-50g-v5 + ``` -1. Setup a sym link so that gce.py will pick it up (link must be in same dir as gce.py) +1. Define the environment variable GCE_INI_PATH so gce.py can pick it up and bin/cluster can also read it ``` - cd openshift-ansible/inventory/gce - ln -s ~/.gce/gce.ini gce.ini +export GCE_INI_PATH=~/.gce/gce.ini ``` diff --git a/README_libvirt.md b/README_libvirt.md index 1a710ff3b..3f8bbb5f0 100644 --- a/README_libvirt.md +++ b/README_libvirt.md @@ -94,7 +94,8 @@ dns=dnsmasq - Configure dnsmasq to use the Virtual Network router for example.com: ```sh -sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf server=/example.com/192.168.55.1 +sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf +server=/example.com/192.168.55.1 ``` Test The Setup diff --git a/bin/cluster b/bin/cluster index 96dcf01e8..59a6755d3 100755 --- a/bin/cluster +++ b/bin/cluster @@ -137,10 +137,14 @@ class Cluster(object): """ config = ConfigParser.ConfigParser() if 'gce' == provider: - config.readfp(open('inventory/gce/hosts/gce.ini')) - - for key in config.options('gce'): - os.environ[key] = config.get('gce', key) + gce_ini_default_path = os.path.join( + 'inventory/gce/hosts/gce.ini') + gce_ini_path = os.environ.get('GCE_INI_PATH', gce_ini_default_path) + if os.path.exists(gce_ini_path): + config.readfp(open(gce_ini_path)) + + for key in config.options('gce'): + os.environ[key] = config.get('gce', key) inventory = '-i inventory/gce/hosts' elif 'aws' == provider: diff --git a/inventory/gce/hosts/gce.py b/inventory/gce/hosts/gce.py index 3403f735e..6ed12e011 100755 --- a/inventory/gce/hosts/gce.py +++ b/inventory/gce/hosts/gce.py @@ -120,6 +120,7 @@ class GceInventory(object): os.path.dirname(os.path.realpath(__file__)), "gce.ini") gce_ini_path = os.environ.get('GCE_INI_PATH', gce_ini_default_path) + # Create a ConfigParser. # This provides empty defaults to each key, so that environment # variable configuration (as opposed to INI configuration) is able @@ -173,6 +174,7 @@ class GceInventory(object): args[1] = os.environ.get('GCE_PEM_FILE_PATH', args[1]) kwargs['project'] = os.environ.get('GCE_PROJECT', kwargs['project']) + # Retrieve and return the GCE driver. gce = get_driver(Provider.GCE)(*args, **kwargs) gce.connection.user_agent_append( @@ -211,7 +213,8 @@ class GceInventory(object): 'gce_image': inst.image, 'gce_machine_type': inst.size, 'gce_private_ip': inst.private_ips[0], - 'gce_public_ip': inst.public_ips[0], + # Hosts don't always have a public IP name + #'gce_public_ip': inst.public_ips[0], 'gce_name': inst.name, 'gce_description': inst.extra['description'], 'gce_status': inst.extra['status'], @@ -219,8 +222,8 @@ class GceInventory(object): 'gce_tags': inst.extra['tags'], 'gce_metadata': md, 'gce_network': net, - # Hosts don't have a public name, so we add an IP - 'ansible_ssh_host': inst.public_ips[0] + # Hosts don't always have a public IP name + #'ansible_ssh_host': inst.public_ips[0] } def get_instance(self, instance_name): diff --git a/inventory/openstack/hosts/nova.py b/inventory/openstack/hosts/nova.py index d5bd8d1ee..3197a57bc 100755 --- a/inventory/openstack/hosts/nova.py +++ b/inventory/openstack/hosts/nova.py @@ -34,7 +34,7 @@ except ImportError: # executed with no parameters, return the list of # all groups and hosts -NOVA_CONFIG_FILES = [os.getcwd() + "/nova.ini", +NOVA_CONFIG_FILES = [os.path.join(os.path.dirname(os.path.realpath(__file__)), "nova.ini"), os.path.expanduser(os.environ.get('ANSIBLE_CONFIG', "~/nova.ini")), "/etc/ansible/nova.ini"] diff --git a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml index c9ae923bb..b6a2d2f26 100644 --- a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml +++ b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml @@ -27,9 +27,8 @@ gather_facts: no vars: - cli_volume_type: io1 + cli_volume_type: gp2 cli_volume_size: 30 - cli_volume_iops: "{{ 30 * cli_volume_size }}" pre_tasks: - fail: @@ -104,7 +103,6 @@ volume_size: "{{ cli_volume_size | default(30, True)}}" volume_type: "{{ cli_volume_type }}" device_name: /dev/xvdb - iops: "{{ 30 * cli_volume_size }}" register: vol - debug: var=vol @@ -142,10 +140,3 @@ - debug: var=dockerstart - - name: Wait for docker to stabilize - pause: - seconds: 30 - - # leaving off the '-t' for docker exec. With it, it doesn't work with ansible and tty support - - name: update zabbix docker items - command: docker exec -i oso-rhel7-zagg-client /usr/local/bin/cron-send-docker-metrics.py diff --git a/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml b/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml index ef9b45abd..63d473146 100644 --- a/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml +++ b/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml @@ -172,7 +172,7 @@ - name: pvmove onto new volume command: "pvmove {{ docker_pv_name.stdout }} /dev/xvdc1" - async: 3600 + async: 43200 poll: 10 - name: Remove the old docker drive from the volume group diff --git a/playbooks/common/openshift-cluster/set_infra_launch_facts_tasks.yml b/playbooks/common/openshift-cluster/set_infra_launch_facts_tasks.yml new file mode 100644 index 000000000..01d70a1a6 --- /dev/null +++ b/playbooks/common/openshift-cluster/set_infra_launch_facts_tasks.yml @@ -0,0 +1,15 @@ +--- +- set_fact: k8s_type=infra +- set_fact: sub_host_type="{{ type }}" +- set_fact: number_infra="{{ count }}" + +- name: Generate infra instance names(s) + set_fact: + scratch_name: "{{ cluster_id }}-{{ k8s_type }}-{{ sub_host_type }}-{{ '%05x' | format(1048576 | random) }}" + register: infra_names_output + with_sequence: count={{ number_infra }} + +- set_fact: + infra_names: "{{ infra_names_output.results | default([], true) + | oo_collect('ansible_facts') + | oo_collect('scratch_name') }}" diff --git a/playbooks/gce/openshift-cluster/config.yml b/playbooks/gce/openshift-cluster/config.yml index fd5dfcc72..7bd3f1a56 100644 --- a/playbooks/gce/openshift-cluster/config.yml +++ b/playbooks/gce/openshift-cluster/config.yml @@ -10,6 +10,8 @@ - set_fact: g_ssh_user_tmp: "{{ deployment_vars[deployment_type].ssh_user }}" g_sudo_tmp: "{{ deployment_vars[deployment_type].sudo }}" + use_sdn: "{{ do_we_use_openshift_sdn }}" + sdn_plugin: "{{ sdn_network_plugin }}" - include: ../../common/openshift-cluster/config.yml vars: @@ -22,3 +24,5 @@ openshift_debug_level: 2 openshift_deployment_type: "{{ deployment_type }}" openshift_hostname: "{{ gce_private_ip }}" + openshift_use_openshift_sdn: "{{ hostvars.localhost.use_sdn }}" + os_sdn_network_plugin_name: "{{ hostvars.localhost.sdn_plugin }}" diff --git a/playbooks/gce/openshift-cluster/join_node.yml b/playbooks/gce/openshift-cluster/join_node.yml new file mode 100644 index 000000000..0dfa3e9d7 --- /dev/null +++ b/playbooks/gce/openshift-cluster/join_node.yml @@ -0,0 +1,49 @@ +--- +- name: Populate oo_hosts_to_update group + hosts: localhost + gather_facts: no + vars_files: + - vars.yml + tasks: + - name: Evaluate oo_hosts_to_update + add_host: + name: "{{ node_ip }}" + groups: oo_hosts_to_update + ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user }}" + ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" + +- include: ../../common/openshift-cluster/update_repos_and_packages.yml + +- name: Populate oo_masters_to_config host group + hosts: localhost + gather_facts: no + vars_files: + - vars.yml + tasks: + - name: Evaluate oo_nodes_to_config + add_host: + name: "{{ node_ip }}" + ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user }}" + ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" + groups: oo_nodes_to_config + + - name: Evaluate oo_first_master + add_host: + name: "{{ groups['tag_env-host-type-' ~ cluster_id ~ '-openshift-master'][0] }}" + ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user }}" + ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" + groups: oo_first_master + when: "'tag_env-host-type-{{ cluster_id }}-openshift-master' in groups" + +#- include: config.yml +- include: ../../common/openshift-node/config.yml + vars: + openshift_cluster_id: "{{ cluster_id }}" + openshift_debug_level: 4 + openshift_deployment_type: "{{ deployment_type }}" + openshift_hostname: "{{ ansible_default_ipv4.address }}" + openshift_use_openshift_sdn: true + openshift_node_labels: "{{ lookup('oo_option', 'openshift_node_labels') }} " + os_sdn_network_plugin_name: "redhat/openshift-ovs-subnet" + osn_cluster_dns_domain: "{{ hostvars[groups.oo_first_master.0].openshift.dns.domain }}" + osn_cluster_dns_ip: "{{ hostvars[groups.oo_first_master.0].openshift.dns.ip }}" diff --git a/playbooks/gce/openshift-cluster/launch.yml b/playbooks/gce/openshift-cluster/launch.yml index 7a3b80da0..94e57fe4e 100644 --- a/playbooks/gce/openshift-cluster/launch.yml +++ b/playbooks/gce/openshift-cluster/launch.yml @@ -28,33 +28,33 @@ type: "{{ k8s_type }}" g_sub_host_type: "{{ sub_host_type }}" - - include: ../../common/openshift-cluster/set_node_launch_facts_tasks.yml - vars: - type: "infra" - count: "{{ num_infra }}" - - include: tasks/launch_instances.yml - vars: - instances: "{{ infra_names }}" - cluster: "{{ cluster_id }}" - type: "{{ k8s_type }}" - g_sub_host_type: "{{ sub_host_type }}" - - - set_fact: - a_infra: "{{ infra_names[0] }}" - - add_host: name={{ a_infra }} groups=service_master - +# - include: ../../common/openshift-cluster/set_infra_launch_facts_tasks.yml +# vars: +# type: "infra" +# count: "{{ num_infra }}" +# - include: tasks/launch_instances.yml +# vars: +# instances: "{{ infra_names }}" +# cluster: "{{ cluster_id }}" +# type: "{{ k8s_type }}" +# g_sub_host_type: "{{ sub_host_type }}" +# +# - set_fact: +# a_infra: "{{ infra_names[0] }}" +# - add_host: name={{ a_infra }} groups=service_master +# - include: update.yml - -- name: Deploy OpenShift Services - hosts: service_master - connection: ssh - gather_facts: yes - roles: - - openshift_registry - - openshift_router - -- include: ../../common/openshift-cluster/create_services.yml - vars: - g_svc_master: "{{ service_master }}" +# +#- name: Deploy OpenShift Services +# hosts: service_master +# connection: ssh +# gather_facts: yes +# roles: +# - openshift_registry +# - openshift_router +# +#- include: ../../common/openshift-cluster/create_services.yml +# vars: +# g_svc_master: "{{ service_master }}" - include: list.yml diff --git a/playbooks/gce/openshift-cluster/list.yml b/playbooks/gce/openshift-cluster/list.yml index 5ba0f5a48..53b2b9a5e 100644 --- a/playbooks/gce/openshift-cluster/list.yml +++ b/playbooks/gce/openshift-cluster/list.yml @@ -14,11 +14,11 @@ groups: oo_list_hosts ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}" ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" - with_items: groups[scratch_group] | default([]) | difference(['localhost']) | difference(groups.status_terminated) + with_items: groups[scratch_group] | default([], true) | difference(['localhost']) | difference(groups.status_terminated | default([], true)) - name: List instance(s) hosts: oo_list_hosts gather_facts: no tasks: - debug: - msg: "public ip:{{ hostvars[inventory_hostname].gce_public_ip }} private ip:{{ hostvars[inventory_hostname].gce_private_ip }}" + msg: "private ip:{{ hostvars[inventory_hostname].gce_private_ip }}" diff --git a/playbooks/gce/openshift-cluster/tasks/launch_instances.yml b/playbooks/gce/openshift-cluster/tasks/launch_instances.yml index 6307ecc27..e300b5b5a 100644 --- a/playbooks/gce/openshift-cluster/tasks/launch_instances.yml +++ b/playbooks/gce/openshift-cluster/tasks/launch_instances.yml @@ -10,33 +10,38 @@ service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" project_id: "{{ lookup('env', 'gce_project_id') }}" + zone: "{{ lookup('env', 'zone') }}" + network: "{{ lookup('env', 'network') }}" +# unsupported in 1.9.+ + #service_account_permissions: "datastore,logging-write" tags: - created-by-{{ lookup('env', 'LOGNAME') |default(cluster, true) }} - env-{{ cluster }} - host-type-{{ type }} - - sub-host-type-{{ sub_host_type }} + - sub-host-type-{{ g_sub_host_type }} - env-host-type-{{ cluster }}-openshift-{{ type }} + when: instances |length > 0 register: gce - name: Add new instances to groups and set variables needed add_host: hostname: "{{ item.name }}" - ansible_ssh_host: "{{ item.public_ip }}" + ansible_ssh_host: "{{ item.name }}" ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}" ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" groups: "{{ item.tags | oo_prepend_strings_in_list('tag_') | join(',') }}" gce_public_ip: "{{ item.public_ip }}" gce_private_ip: "{{ item.private_ip }}" - with_items: gce.instance_data + with_items: gce.instance_data | default([], true) - name: Wait for ssh - wait_for: port=22 host={{ item.public_ip }} - with_items: gce.instance_data + wait_for: port=22 host={{ item.name }} + with_items: gce.instance_data | default([], true) - name: Wait for user setup command: "ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o ConnectTimeout=10 -o UserKnownHostsFile=/dev/null {{ hostvars[item.name].ansible_ssh_user }}@{{ item.public_ip }} echo {{ hostvars[item.name].ansible_ssh_user }} user is setup" register: result until: result.rc == 0 - retries: 20 - delay: 10 - with_items: gce.instance_data + retries: 30 + delay: 5 + with_items: gce.instance_data | default([], true) diff --git a/playbooks/gce/openshift-cluster/terminate.yml b/playbooks/gce/openshift-cluster/terminate.yml index 098b0df73..e20e0a8bc 100644 --- a/playbooks/gce/openshift-cluster/terminate.yml +++ b/playbooks/gce/openshift-cluster/terminate.yml @@ -1,25 +1,18 @@ --- - name: Terminate instance(s) hosts: localhost + connection: local gather_facts: no vars_files: - vars.yml tasks: - - set_fact: scratch_group=tag_env-host-type-{{ cluster_id }}-openshift-node + - set_fact: scratch_group=tag_env-{{ cluster_id }} - add_host: name: "{{ item }}" - groups: oo_hosts_to_terminate, oo_nodes_to_terminate + groups: oo_hosts_to_terminate ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}" ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" - with_items: groups[scratch_group] | default([]) | difference(['localhost']) | difference(groups.status_terminated) - - - set_fact: scratch_group=tag_env-host-type-{{ cluster_id }}-openshift-master - - add_host: - name: "{{ item }}" - groups: oo_hosts_to_terminate, oo_masters_to_terminate - ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}" - ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" - with_items: groups[scratch_group] | default([]) | difference(['localhost']) | difference(groups.status_terminated) + with_items: groups[scratch_group] | default([], true) | difference(['localhost']) | difference(groups.status_terminated | default([], true)) - name: Unsubscribe VMs hosts: oo_hosts_to_terminate @@ -32,14 +25,34 @@ lookup('oo_option', 'rhel_skip_subscription') | default(rhsub_skip, True) | default('no', True) | lower in ['no', 'false'] -- include: ../openshift-node/terminate.yml - vars: - gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" - gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" - gce_project_id: "{{ lookup('env', 'gce_project_id') }}" +- name: Terminate instances(s) + hosts: localhost + connection: local + gather_facts: no + vars_files: + - vars.yml + tasks: + + - name: Terminate instances that were previously launched + local_action: + module: gce + state: 'absent' + name: "{{ item }}" + service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" + pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" + project_id: "{{ lookup('env', 'gce_project_id') }}" + zone: "{{ lookup('env', 'zone') }}" + with_items: groups['oo_hosts_to_terminate'] | default([], true) + when: item is defined -- include: ../openshift-master/terminate.yml - vars: - gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" - gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" - gce_project_id: "{{ lookup('env', 'gce_project_id') }}" +#- include: ../openshift-node/terminate.yml +# vars: +# gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" +# gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" +# gce_project_id: "{{ lookup('env', 'gce_project_id') }}" +# +#- include: ../openshift-master/terminate.yml +# vars: +# gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" +# gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" +# gce_project_id: "{{ lookup('env', 'gce_project_id') }}" diff --git a/playbooks/gce/openshift-cluster/vars.yml b/playbooks/gce/openshift-cluster/vars.yml index ae33083b9..6de007807 100644 --- a/playbooks/gce/openshift-cluster/vars.yml +++ b/playbooks/gce/openshift-cluster/vars.yml @@ -1,8 +1,11 @@ --- +do_we_use_openshift_sdn: true +sdn_network_plugin: redhat/openshift-ovs-subnet +# os_sdn_network_plugin_name can be ovssubnet or multitenant, see https://docs.openshift.org/latest/architecture/additional_concepts/sdn.html#ovssubnet-plugin-operation deployment_vars: origin: - image: centos-7 - ssh_user: + image: preinstalled-slave-50g-v5 + ssh_user: root sudo: yes online: image: libra-rhel7 @@ -12,4 +15,3 @@ deployment_vars: image: rhel-7 ssh_user: sudo: yes - diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py index 2ccc21292..6faa82dfc 100644 --- a/roles/lib_zabbix/library/zbx_item.py +++ b/roles/lib_zabbix/library/zbx_item.py @@ -53,6 +53,8 @@ def get_value_type(value_type): vtype = 0 if 'int' in value_type: vtype = 3 + elif 'log' in value_type: + vtype = 2 elif 'char' in value_type: vtype = 1 elif 'str' in value_type: diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py index 4ec1b8e02..e7fd6fa21 100644 --- a/roles/lib_zabbix/library/zbx_itemprototype.py +++ b/roles/lib_zabbix/library/zbx_itemprototype.py @@ -128,12 +128,12 @@ def get_status(status): return _status -def get_app_ids(zapi, application_names): +def get_app_ids(zapi, application_names, templateid): ''' get application ids from names ''' app_ids = [] for app_name in application_names: - content = zapi.get_content('application', 'get', {'search': {'name': app_name}}) + content = zapi.get_content('application', 'get', {'filter': {'name': app_name}, 'templateids': templateid}) if content.has_key('result'): app_ids.append(content['result'][0]['applicationid']) return app_ids @@ -212,7 +212,7 @@ def main(): 'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']), 'type': get_type(module.params['ztype']), 'value_type': get_value_type(module.params['value_type']), - 'applications': get_app_ids(zapi, module.params['applications']), + 'applications': get_app_ids(zapi, module.params['applications'], template['templateid']), 'description': module.params['description'], } diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py index 21d0fcfd2..ab7731faa 100644 --- a/roles/lib_zabbix/library/zbx_trigger.py +++ b/roles/lib_zabbix/library/zbx_trigger.py @@ -86,6 +86,24 @@ def get_trigger_status(inc_status): return r_status +def get_template_id(zapi, template_name): + ''' + get related templates + ''' + template_ids = [] + app_ids = {} + # Fetch templates by name + content = zapi.get_content('template', + 'get', + {'search': {'host': template_name}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + template_ids.append(content['result'][0]['templateid']) + for app in content['result'][0]['applications']: + app_ids[app['name']] = app['applicationid'] + + return template_ids, app_ids + def main(): ''' Create a trigger in zabbix @@ -117,6 +135,7 @@ def main(): url=dict(default=None, type='str'), status=dict(default=None, type='str'), state=dict(default='present', type='str'), + template_name=dict(default=None, type='str'), ), #supports_check_mode=True ) @@ -132,11 +151,16 @@ def main(): state = module.params['state'] tname = module.params['name'] + templateid = None + if module.params['template_name']: + templateid, _ = get_template_id(zapi, module.params['template_name']) + content = zapi.get_content(zbx_class_name, 'get', {'filter': {'description': tname}, 'expandExpression': True, 'selectDependencies': 'triggerid', + 'templateids': templateid, }) # Get diff --git a/roles/lib_zabbix/library/zbx_user_media.py b/roles/lib_zabbix/library/zbx_user_media.py index 9ed838f81..8895c78c3 100644 --- a/roles/lib_zabbix/library/zbx_user_media.py +++ b/roles/lib_zabbix/library/zbx_user_media.py @@ -260,6 +260,9 @@ def main(): for user in params['users']: diff['users']['userid'] = user['userid'] + # Medias have no real unique key so therefore we need to make it like the incoming user's request + diff['medias'] = medias + # We have differences and need to update content = zapi.get_content(zbx_class_name, 'updatemedia', diff) diff --git a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json index 0497e6824..5df36ccc2 100644 --- a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json +++ b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json @@ -6,10 +6,10 @@ "iconClass" : "icon-jboss", "description": "Application template for EAP 6 applications built using STI." }, - "name": "eap6-basic-sti" + "name": "eap6-https-sti" }, "labels": { - "template": "eap6-basic-sti" + "template": "eap6-https-sti" }, "parameters": [ { diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 987f7f7da..69bb49c9b 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -480,11 +480,11 @@ def set_deployment_facts_if_unset(facts): if role in facts: deployment_type = facts['common']['deployment_type'] if 'registry_url' not in facts[role]: - registry_url = 'aos3/aos-${component}:${version}' - if deployment_type in ['enterprise', 'online']: + registry_url = 'openshift/origin-${component}:${version}' + if deployment_type in ['enterprise', 'online', 'openshift-enterprise']: registry_url = 'openshift3/ose-${component}:${version}' - elif deployment_type == 'origin': - registry_url = 'openshift/origin-${component}:${version}' + elif deployment_type == 'atomic-enterprise': + registry_url = 'aep3/aep-${component}:${version}' facts[role]['registry_url'] = registry_url return facts diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index fd3d20800..6301d4fc0 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -1,5 +1,5 @@ --- -- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 +- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 and not 1.9.0.1 assert: that: - ansible_version | version_compare('1.8.0', 'ge') diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml index 7c4f45ce6..637e494ea 100644 --- a/roles/openshift_manage_node/tasks/main.yml +++ b/roles/openshift_manage_node/tasks/main.yml @@ -1,21 +1,21 @@ - name: Wait for Node Registration command: > - {{ openshift.common.client_binary }} get node {{ item }} + {{ openshift.common.client_binary }} get node {{ item | lower }} register: omd_get_node until: omd_get_node.rc == 0 - retries: 10 + retries: 20 delay: 5 with_items: openshift_nodes - name: Set node schedulability command: > - {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }} + {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname | lower }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }} with_items: - "{{ openshift_node_vars }}" - name: Label nodes command: > - {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname }} {{ item.openshift.node.labels | oo_combine_dict }} + {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname | lower }} {{ item.openshift.node.labels | oo_combine_dict }} with_items: - "{{ openshift_node_vars }}" when: "'labels' in item.openshift.node and item.openshift.node.labels != {}" diff --git a/roles/openshift_master/templates/scheduler.json.j2 b/roles/openshift_master/templates/scheduler.json.j2 index 835f2383e..cb5f43bb2 100644 --- a/roles/openshift_master/templates/scheduler.json.j2 +++ b/roles/openshift_master/templates/scheduler.json.j2 @@ -1,4 +1,6 @@ { + "kind": "Policy", + "apiVersion": "v1", "predicates": [ {"name": "MatchNodeSelector"}, {"name": "PodFitsResources"}, diff --git a/roles/openshift_master/templates/v1_partials/oauthConfig.j2 b/roles/openshift_master/templates/v1_partials/oauthConfig.j2 index 72889bc29..8a4f5a746 100644 --- a/roles/openshift_master/templates/v1_partials/oauthConfig.j2 +++ b/roles/openshift_master/templates/v1_partials/oauthConfig.j2 @@ -80,6 +80,7 @@ oauthConfig: provider: {{ identity_provider_config(identity_provider) }} {%- endfor %} + masterCA: ca.crt masterPublicURL: {{ openshift.master.public_api_url }} masterURL: {{ openshift.master.api_url }} sessionConfig: diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index e8cc499c0..d45dd8073 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -22,7 +22,7 @@ deployment_type: "{{ openshift_deployment_type }}" - role: node local_facts: - labels: "{{ openshift_node_labels | default(none) }}" + labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default(none), true) }}" annotations: "{{ openshift_node_annotations | default(none) }}" registry_url: "{{ oreg_url | default(none) }}" debug_level: "{{ openshift_node_debug_level | default(openshift.common.debug_level) }}" diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2 index 07d80f99b..946c0b655 100644 --- a/roles/openshift_node/templates/node.yaml.v1.j2 +++ b/roles/openshift_node/templates/node.yaml.v1.j2 @@ -18,7 +18,7 @@ networkPluginName: {{ openshift.common.sdn_network_plugin_name }} networkConfig: mtu: {{ openshift.node.sdn_mtu }} networkPluginName: {{ openshift.common.sdn_network_plugin_name }} -nodeName: {{ openshift.common.hostname }} +nodeName: {{ openshift.common.hostname | lower }} podManifestConfig: servingInfo: bindAddress: 0.0.0.0:10250 diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index 28e900255..a503b24d7 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -13,6 +13,8 @@ - include_vars: template_openshift_master.yml - include_vars: template_openshift_node.yml - include_vars: template_ops_tools.yml +- include_vars: template_app_zabbix_server.yml +- include_vars: template_app_zabbix_agent.yml - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -61,3 +63,19 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Server + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_server }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Agent + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_app_zabbix_agent }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml new file mode 100644 index 000000000..06c4eda8b --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -0,0 +1,23 @@ +--- +g_template_app_zabbix_agent: + name: Template App Zabbix Agent + zitems: + - key: agent.hostname + applications: + - Zabbix agent + value_type: character + zabbix_type: '0' + + - key: agent.ping + applications: + - Zabbix agent + description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. + value_type: int + zabbix_type: '0' + + ztriggers: + - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' + description: Zabbix agent is unreachable for 15 minutes. + expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1' + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml new file mode 100644 index 000000000..dace2aa29 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -0,0 +1,408 @@ +--- +g_template_app_zabbix_server: + name: Template App Zabbix Server + zitems: + - key: housekeeper_creates + applications: + - Zabbix server + description: A simple count of the number of partition creates output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_drops + applications: + - Zabbix server + description: A simple count of the number of partition drops output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_errors + applications: + - Zabbix server + description: A simple count of the number of errors output by the housekeeper script. + units: '' + value_type: int + zabbix_type: '2' + + - key: housekeeper_total + applications: + - Zabbix server + description: A simple count of the total number of lines output by the housekeeper + script. + units: '' + value_type: int + zabbix_type: '2' + + - key: zabbix[process,alerter,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,configuration syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,db watchdog,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,discoverer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,escalator,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,history syncer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,housekeeper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,http poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,icmp pinger,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,ipmi poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,java poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,node watcher,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,proxy poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,self-monitoring,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,snmp trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,timer,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,trapper,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[process,unreachable poller,avg,busy] + applications: + - Zabbix server + description: '' + units: '%' + value_type: float + zabbix_type: '5' + + - key: zabbix[queue,10m] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[queue] + applications: + - Zabbix server + description: '' + units: '' + value_type: int + zabbix_type: '5' + + - key: zabbix[rcache,buffer,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,history,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,text,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,trend,pfree] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + + - key: zabbix[wcache,values] + applications: + - Zabbix server + description: '' + units: '' + value_type: float + zabbix_type: '5' + ztriggers: + - description: "There has been unexpected output while running the housekeeping script\ + \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ + \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\ + \ for more details." + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}' + name: Unexpected output in Zabbix DB Housekeeping + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc + + - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details. + expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0' + name: Errors during Zabbix DB Housekeeping + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75' + name: Zabbix alerter processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75' + name: Zabbix configuration syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75' + name: Zabbix db watchdog processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75' + name: Zabbix discoverer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75' + name: Zabbix escalator processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75' + name: Zabbix history syncer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75' + name: Zabbix housekeeper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75' + name: Zabbix http poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75' + name: Zabbix icmp pinger processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75' + name: Zabbix ipmi poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75' + name: Zabbix java poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75' + name: Zabbix node watcher processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75' + name: Zabbix poller processes more than 75% busy + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75' + name: Zabbix proxy poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75' + name: Zabbix self-monitoring processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75' + name: Zabbix snmp trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: Timer processes usually are busy because they have to process time + based trigger functions + expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75' + name: Zabbix timer processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75' + name: Zabbix trapper processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75' + name: Zabbix unreachable poller processes more than 75% busy + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + + - description: "This alert generally indicates a performance problem or a problem\ + \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\ + \ is Administration > Queue. Be sure to check the general view and the per-proxy\ + \ view." + expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000' + name: More than 1000 items having missing data for more than 10 minutes + priority: high + url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc + + - description: Consider increasing CacheSize in the zabbix_server.conf configuration + file + expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5' + name: Less than 5% free in the configuration cache + priority: info + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25' + name: Less than 25% free in the history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25' + name: Less than 25% free in the text history cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + + - description: '' + expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25' + name: Less than 25% free in the trends cache + priority: avg + url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index c71e07910..4ae918ec6 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,6 +13,18 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.user.count + description: Shows number of users in a cluster + type: int + applications: + - Openshift Master + + - key: openshift.master.pod.running.count + description: Shows number of pods running + type: int + applications: + - Openshift Master + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -28,3 +40,9 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high + + - name: 'Number of users for Openshift Master on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: info + diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml index 36f9cc4a3..ce28b1048 100644 --- a/roles/os_zabbix/vars/template_openshift_node.yml +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -8,13 +8,37 @@ g_template_openshift_node: applications: - Openshift Node + - key: openshift.node.ovs.pids.count + description: Shows number of ovs process ids running + type: int + applications: + - Openshift Node + + - key: openshift.node.ovs.ports.count + description: Shows number of OVS ports defined + type: int + applications: + - Openshift Node + ztriggers: - name: 'Openshift Node process not running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high - name: 'Too many Openshift Node processes running on {HOST.NAME}' expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + - name: 'OVS may not be running on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last()}<>4' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' priority: high + + - name: 'Number of OVS ports is 0 on {HOST.NAME}' + expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0' + url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' + priority: high + + diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 70c3809bd..69432273f 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -188,18 +188,6 @@ g_template_os_linux: multiplier: 1024 units: B - # Disk items - - key: filesys.full.xvda2 - applications: - - Disk - value_type: float - - - key: filesys.full.xvda3 - applications: - - Disk - value_type: float - - zdiscoveryrules: - name: disc.filesys key: disc.filesys @@ -215,38 +203,36 @@ g_template_os_linux: applications: - Disk - ztriggerprototypes: - - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: warn - - - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95' - url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' - priority: high + - discoveryrule_key: disc.filesys + name: "Percentage of used inodes on {#OSO_FILESYS}" + key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]" + value_type: float + description: "PCP derived value of percentage of used inodes on a filesystem." + applications: + - Disk - ztriggers: - - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' + ztriggerprototypes: + - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - name: 'Filesystem: / has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' + - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}' - expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' + - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' + expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high + ztriggers: - name: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' |