diff options
-rw-r--r-- | Vagrantfile | 9 | ||||
-rw-r--r-- | playbooks/common/openshift-node/config.yml | 10 | ||||
-rw-r--r-- | roles/lib_zabbix/library/zbx_trigger.py | 18 | ||||
-rw-r--r-- | roles/lib_zabbix/tasks/create_template.yml | 1 | ||||
-rwxr-xr-x | roles/openshift_facts/library/openshift_facts.py | 23 | ||||
-rw-r--r-- | roles/openshift_manage_node/tasks/main.yml | 12 | ||||
-rw-r--r-- | roles/openshift_node/tasks/main.yml | 1 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_docker.yml | 6 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_heartbeat.yml | 1 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 1 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_os_linux.yml | 11 |
11 files changed, 69 insertions, 24 deletions
diff --git a/Vagrantfile b/Vagrantfile index 4675b5d60..33532cd63 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -38,7 +38,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| end config.vm.provider "virtualbox" do |vbox, override| - override.vm.box = "chef/centos-7.1" + override.vm.box = "centos/7" vbox.memory = 1024 vbox.cpus = 2 @@ -54,8 +54,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| when "enterprise" override.vm.box = "rhel-7" when "origin" - override.vm.box = "centos-7.1" - override.vm.box_url = "https://download.gluster.org/pub/gluster/purpleidea/vagrant/centos-7.1/centos-7.1.box" + override.vm.box = "centos/7" override.vm.box_download_checksum = "b2a9f7421e04e73a5acad6fbaf4e9aba78b5aeabf4230eebacc9942e577c1e05" override.vm.box_download_checksum_type = "sha256" end @@ -66,7 +65,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.define "node#{node_index}" do |node| node.vm.hostname = "ose3-node#{node_index}.example.com" node.vm.network :private_network, ip: "192.168.100.#{200 + n}" - config.vm.provision "shell", inline: "nmcli connection reload; systemctl restart network.service" + config.vm.provision "shell", inline: "nmcli connection reload; systemctl restart NetworkManager.service" end end @@ -74,7 +73,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.hostname = "ose3-master.example.com" master.vm.network :private_network, ip: "192.168.100.100" master.vm.network :forwarded_port, guest: 8443, host: 8443 - config.vm.provision "shell", inline: "nmcli connection reload; systemctl restart network.service" + config.vm.provision "shell", inline: "nmcli connection reload; systemctl restart NetworkManager.service" master.vm.provision "ansible" do |ansible| ansible.limit = 'all' ansible.sudo = true diff --git a/playbooks/common/openshift-node/config.yml b/playbooks/common/openshift-node/config.yml index ba2f40d55..a14ca8e11 100644 --- a/playbooks/common/openshift-node/config.yml +++ b/playbooks/common/openshift-node/config.yml @@ -20,6 +20,7 @@ local_facts: labels: "{{ openshift_node_labels | default(None) }}" annotations: "{{ openshift_node_annotations | default(None) }}" + schedulable: "{{ openshift_schedulable | default(openshift_scheduleable) | default(None) }}" - name: Check status of node certificates stat: path: "{{ openshift.common.config_base }}/node/{{ item }}" @@ -124,21 +125,14 @@ - os_env_extras - os_env_extras_node -- name: Set scheduleability +- name: Set schedulability hosts: oo_first_master vars: openshift_nodes: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config']) | oo_collect('openshift.common.hostname') }}" - openshift_unscheduleable_nodes: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config'] | default([])) - | oo_collect('openshift.common.hostname', {'openshift_scheduleable': False}) }}" openshift_node_vars: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config']) }}" pre_tasks: - - set_fact: - openshift_scheduleable_nodes: "{{ hostvars - | oo_select_keys(groups['oo_nodes_to_config'] | default([])) - | oo_collect('openshift.common.hostname') - | difference(openshift_unscheduleable_nodes) }}" roles: - openshift_manage_node diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py index c707a2f64..c384f6fa3 100644 --- a/roles/lib_zabbix/library/zbx_trigger.py +++ b/roles/lib_zabbix/library/zbx_trigger.py @@ -101,6 +101,7 @@ def main(): description=dict(default=None, type='str'), dependencies=dict(default=[], type='list'), priority=dict(default='avg', type='str'), + url=dict(default=None, type='str'), state=dict(default='present', type='str'), ), #supports_check_mode=True @@ -123,28 +124,41 @@ def main(): 'expandExpression': True, 'selectDependencies': 'triggerid', }) + + # Get if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") + # Delete if state == 'absent': if not exists(content): module.exit_json(changed=False, state="absent") content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) module.exit_json(changed=True, results=content['result'], state="absent") + # Create and Update if state == 'present': params = {'description': description, 'expression': module.params['expression'], 'dependencies': get_deps(zapi, module.params['dependencies']), 'priority': get_priority(module.params['priority']), + 'url': module.params['url'], } + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# if not exists(content): # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties + + ######## + # UPDATE + ######## differences = {} zab_results = content['result'][0] for key, value in params.items(): diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml index 630d5dd70..bc9aff997 100644 --- a/roles/lib_zabbix/tasks/create_template.yml +++ b/roles/lib_zabbix/tasks/create_template.yml @@ -45,5 +45,6 @@ dependencies: "{{ item.dependencies | default([], true) }}" expression: "{{ item.expression }}" priority: "{{ item.priority }}" + url: "{{ item.url | default(None, True) }}" with_items: template.ztriggers when: template.ztriggers is defined diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index d4f38a7b4..679c3273a 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -16,6 +16,7 @@ EXAMPLES = ''' import ConfigParser import copy import os +from distutils.util import strtobool def hostname_valid(hostname): @@ -301,6 +302,23 @@ def set_fluentd_facts_if_unset(facts): facts['common']['use_fluentd'] = use_fluentd return facts +def set_node_schedulability(facts): + """ Set schedulable facts if not already present in facts dict + Args: + facts (dict): existing facts + Returns: + dict: the facts dict updated with the generated schedulable + facts if they were not already present + + """ + if 'node' in facts: + if 'schedulable' not in facts['node']: + if 'master' in facts: + facts['node']['schedulable'] = False + else: + facts['node']['schedulable'] = True + return facts + def set_metrics_facts_if_unset(facts): """ Set cluster metrics facts if not already present in facts dict dict: the facts dict updated with the generated cluster metrics facts if @@ -477,8 +495,10 @@ def set_sdn_facts_if_unset(facts): were not already present """ if 'common' in facts: + use_sdn = facts['common']['use_openshift_sdn'] + if not (use_sdn == '' or isinstance(use_sdn, bool)): + facts['common']['use_openshift_sdn'] = bool(strtobool(str(use_sdn))) if 'sdn_network_plugin_name' not in facts['common']: - use_sdn = facts['common']['use_openshift_sdn'] plugin = 'redhat/openshift-ovs-subnet' if use_sdn else '' facts['common']['sdn_network_plugin_name'] = plugin @@ -741,6 +761,7 @@ class OpenShiftFacts(object): facts['current_config'] = get_current_config(facts) facts = set_url_facts_if_unset(facts) facts = set_fluentd_facts_if_unset(facts) + facts = set_node_schedulability(facts) facts = set_metrics_facts_if_unset(facts) facts = set_identity_providers_if_unset(facts) facts = set_sdn_facts_if_unset(facts) diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml index 74e702248..7c4f45ce6 100644 --- a/roles/openshift_manage_node/tasks/main.yml +++ b/roles/openshift_manage_node/tasks/main.yml @@ -7,15 +7,11 @@ delay: 5 with_items: openshift_nodes -- name: Handle unscheduleable node +- name: Set node schedulability command: > - {{ openshift.common.admin_binary }} manage-node {{ item }} --schedulable=false - with_items: openshift_unscheduleable_nodes - -- name: Handle scheduleable node - command: > - {{ openshift.common.admin_binary }} manage-node {{ item }} --schedulable=true - with_items: openshift_scheduleable_nodes + {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }} + with_items: + - "{{ openshift_node_vars }}" - name: Label nodes command: > diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 7a862c99f..1986b631e 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -29,6 +29,7 @@ portal_net: "{{ openshift_master_portal_net | default(None) }}" kubelet_args: "{{ openshift_node_kubelet_args | default(None) }}" sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}" + schedulable: "{{ openshift_schedulable | default(openshift_scheduleable) | default(None) }}" # We have to add tuned-profiles in the same transaction otherwise we run into depsolving # problems because the rpms don't pin the version properly. diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index 6a28f52b8..a1cd3519e 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -54,29 +54,35 @@ g_template_docker: ztriggers: - description: 'docker.ping failed on {HOST.NAME}' expression: '{Template Docker:docker.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' priority: high - description: 'Docker storage is using LOOPBACK on {HOST.NAME}' expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' priority: high - description: 'Critically low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.data.space.available.max(#3)}<5' # < 5% or < 5GB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high - description: 'Critically low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<5 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.005' # < 5% or < 5MB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' priority: high # Put triggers that depend on other triggers here (deps must be created first) - description: 'Low docker storage data space on {HOST.NAME}' expression: '{Template Docker:docker.storage.data.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.data.space.available.max(#3)}<10' # < 10% or < 10GB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: - 'Critically low docker storage data space on {HOST.NAME}' priority: average - description: 'Low docker storage metadata space on {HOST.NAME}' expression: '{Template Docker:docker.storage.metadata.space.percent_available.max(#3)}<10 or {Template Docker:docker.storage.metadata.space.available.max(#3)}<0.01' # < 10% or < 10MB + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_storage.asciidoc' dependencies: - 'Critically low docker storage metadata space on {HOST.NAME}' priority: average diff --git a/roles/os_zabbix/vars/template_heartbeat.yml b/roles/os_zabbix/vars/template_heartbeat.yml index 3d0f3d51a..798377cd9 100644 --- a/roles/os_zabbix/vars/template_heartbeat.yml +++ b/roles/os_zabbix/vars/template_heartbeat.yml @@ -10,3 +10,4 @@ g_template_heartbeat: - description: 'Heartbeat.ping has failed on {HOST.NAME}' expression: '{Template Heartbeat:heartbeat.ping.nodata(20m)}=1' priority: avg + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_node_heartbeat.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index c4e4d1c94..d2c1365b0 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -9,4 +9,5 @@ g_template_openshift_master: ztriggers: - description: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 7bc8ddc5d..7c446cd85 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -102,6 +102,11 @@ g_template_os_linux: - Memory value_type: int + - key: mem.util.available + applications: + - Memory + value_type: int + - key: mem.util.used applications: - Memory @@ -160,24 +165,30 @@ g_template_os_linux: ztriggers: - description: 'Filesystem: / has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - description: 'Filesystem: / has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - description: 'Filesystem: /var has less than 10% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: warn - description: 'Filesystem: /var has less than 5% free on {HOST.NAME}' expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' priority: high - description: 'Too many TOTAL processes on {HOST.NAME}' expression: '{Template OS Linux:proc.nprocs.last()}>5000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' priority: warn - description: 'Lack of available memory on {HOST.NAME}' expression: '{Template OS Linux:mem.freemem.last()}<3000' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc' priority: warn |