diff options
-rwxr-xr-x | bin/ohi | 68 | ||||
-rw-r--r-- | bin/openshift_ansible/awsutil.py | 38 | ||||
-rwxr-xr-x | inventory/multi_inventory.py | 18 | ||||
-rw-r--r-- | roles/openshift_master/tasks/main.yml | 6 | ||||
-rw-r--r-- | roles/openshift_node/tasks/main.yml | 6 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_docker.yml | 10 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 8 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_zagg_server.yml | 16 | ||||
-rw-r--r-- | roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 | 2 |
9 files changed, 113 insertions, 59 deletions
@@ -1,14 +1,16 @@ #!/usr/bin/env python +''' +Ohi = Openshift Host Inventory + +This script provides an easy way to look at your host inventory. + +This depends on multi_inventory being setup correctly. +''' # vim: expandtab:tabstop=4:shiftwidth=4 import argparse -import traceback import sys import os -import re -import tempfile -import time -import subprocess import ConfigParser from openshift_ansible import awsutil @@ -20,6 +22,9 @@ CONFIG_HOST_TYPE_ALIAS_SECTION = 'host_type_aliases' class Ohi(object): + ''' + Class for managing openshift host inventory + ''' def __init__(self): self.host_type_aliases = {} self.file_path = os.path.join(os.path.dirname(os.path.realpath(__file__))) @@ -35,6 +40,10 @@ class Ohi(object): self.aws = awsutil.AwsUtil(self.host_type_aliases) def run(self): + ''' + Call into awsutil and retrieve the desired hosts and environments + ''' + if self.args.list_host_types: self.aws.print_host_types() return 0 @@ -43,18 +52,24 @@ class Ohi(object): if self.args.host_type is not None and \ self.args.env is not None: # Both env and host-type specified - hosts = self.aws.get_host_list(host_type=self.args.host_type, \ - envs=self.args.env) + hosts = self.aws.get_host_list(host_type=self.args.host_type, + envs=self.args.env, + version=self.args.openshift_version, + cached=self.args.cache_only) if self.args.host_type is None and \ self.args.env is not None: # Only env specified - hosts = self.aws.get_host_list(envs=self.args.env) + hosts = self.aws.get_host_list(envs=self.args.env, + version=self.args.openshift_version, + cached=self.args.cache_only) if self.args.host_type is not None and \ self.args.env is None: # Only host-type specified - hosts = self.aws.get_host_list(host_type=self.args.host_type) + hosts = self.aws.get_host_list(host_type=self.args.host_type, + version=self.args.openshift_version, + cached=self.args.cache_only) if hosts is None: # We weren't able to determine what they wanted to do @@ -69,6 +84,9 @@ class Ohi(object): return 0 def parse_config_file(self): + ''' + Parse the config file for ohi + ''' if os.path.isfile(self.config_path): config = ConfigParser.ConfigParser() config.read(self.config_path) @@ -85,23 +103,27 @@ class Ohi(object): parser = argparse.ArgumentParser(description='OpenShift Host Inventory') - parser.add_argument('--list-host-types', default=False, action='store_true', - help='List all of the host types') + parser.add_argument('--list-host-types', default=False, action='store_true', help='List all of the host types') - parser.add_argument('-e', '--env', action="store", - help="Which environment to use") + parser.add_argument('-e', '--env', action="store", help="Which environment to use") - parser.add_argument('-t', '--host-type', action="store", - help="Which host type to use") + parser.add_argument('-t', '--host-type', action="store", help="Which host type to use") - parser.add_argument('-l', '--user', action='store', default=None, - help='username') + parser.add_argument('-l', '--user', action='store', default=None, help='username') + parser.add_argument('-c', '--cache-only', action='store_true', default=False, + help='Retrieve the host inventory by cache only. Default is false.') - self.args = parser.parse_args() + parser.add_argument('-o', '--openshift-version', action='store', default='2', + help='Specify the openshift version. Default is 2') -if __name__ == '__main__': + self.args = parser.parse_args() + +def main(): + ''' + Ohi will do its work here + ''' if len(sys.argv) == 1: print "\nError: No options given. Use --help to see the available options\n" sys.exit(0) @@ -110,5 +132,9 @@ if __name__ == '__main__': ohi = Ohi() exitcode = ohi.run() sys.exit(exitcode) - except ArgumentError as e: - print "\nError: %s\n" % e.message + except ArgumentError as err: + print "\nError: %s\n" % err.message + +if __name__ == '__main__': + main() + diff --git a/bin/openshift_ansible/awsutil.py b/bin/openshift_ansible/awsutil.py index ba32b4dbd..1ea2f914c 100644 --- a/bin/openshift_ansible/awsutil.py +++ b/bin/openshift_ansible/awsutil.py @@ -46,14 +46,17 @@ class AwsUtil(object): self.alias_lookup[value] = key @staticmethod - def get_inventory(args=None): + def get_inventory(args=None, cached=False): """Calls the inventory script and returns a dictionary containing the inventory." Keyword arguments: args -- optional arguments to pass to the inventory script """ minv = multi_inventory.MultiInventory(args) - minv.run() + if cached: + minv.get_inventory_from_cache() + else: + minv.run() return minv.result def get_environments(self): @@ -168,11 +171,12 @@ class AwsUtil(object): host_type = self.resolve_host_type(host_type) return "tag_env-host-type_%s-%s" % (env, host_type) - def get_host_list(self, host_type=None, envs=None): + def get_host_list(self, host_type=None, envs=None, version=None, cached=False): """Get the list of hosts from the inventory using host-type and environment """ + retval = set([]) envs = envs or [] - inv = self.get_inventory() + inv = self.get_inventory(cached=cached) # We prefer to deal with a list of environments if issubclass(type(envs), basestring): @@ -183,29 +187,25 @@ class AwsUtil(object): if host_type and envs: # Both host type and environment were specified - retval = [] for env in envs: - env_host_type_tag = self.gen_env_host_type_tag(host_type, env) - if env_host_type_tag in inv.keys(): - retval += inv[env_host_type_tag] - return set(retval) + retval.update(inv.get('tag_environment_%s' % env, [])) + retval.intersection_update(inv.get(self.gen_host_type_tag(host_type), [])) - if envs and not host_type: + elif envs and not host_type: # Just environment was specified - retval = [] for env in envs: env_tag = AwsUtil.gen_env_tag(env) if env_tag in inv.keys(): - retval += inv[env_tag] - return set(retval) + retval.update(inv.get(env_tag, [])) - if host_type and not envs: + elif host_type and not envs: # Just host-type was specified - retval = [] host_type_tag = self.gen_host_type_tag(host_type) if host_type_tag in inv.keys(): - retval = inv[host_type_tag] - return set(retval) + retval.update(inv.get(host_type_tag, [])) + + # If version is specified then return only hosts in that version + if version: + retval.intersection_update(inv.get('oo_version_%s' % version, [])) - # We should never reach here! - raise ArgumentError("Invalid combination of parameters") + return retval diff --git a/inventory/multi_inventory.py b/inventory/multi_inventory.py index 232f2402d..20fc48aa9 100755 --- a/inventory/multi_inventory.py +++ b/inventory/multi_inventory.py @@ -56,15 +56,6 @@ class MultiInventory(object): else: self.config_file = None # expect env vars - - def run(self): - '''This method checks to see if the local - cache is valid for the inventory. - - if the cache is valid; return cache - else the credentials are loaded from multi_inventory.yaml or from the env - and we attempt to get the inventory from the provider specified. - ''' # load yaml if self.config_file and os.path.isfile(self.config_file): self.config = self.load_yaml_config() @@ -91,6 +82,15 @@ class MultiInventory(object): if self.config.has_key('cache_location'): self.cache_path = self.config['cache_location'] + def run(self): + '''This method checks to see if the local + cache is valid for the inventory. + + if the cache is valid; return cache + else the credentials are loaded from multi_inventory.yaml or from the env + and we attempt to get the inventory from the provider specified. + ''' + if self.args.get('refresh_cache', None): self.get_inventory() self.write_to_cache() diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 8995863ec..43647cc49 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -228,7 +228,7 @@ register: start_result - set_fact: - master_service_status_changed = start_result | changed + master_service_status_changed: start_result | changed when: not openshift_master_ha | bool - name: Start and enable master api @@ -237,7 +237,7 @@ register: start_result - set_fact: - master_api_service_status_changed = start_result | changed + master_api_service_status_changed: start_result | changed when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' - name: Start and enable master controller @@ -246,7 +246,7 @@ register: start_result - set_fact: - master_controllers_service_status_changed = start_result | changed + master_controllers_service_status_changed: start_result | changed when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' - name: Install cluster packages diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index eef7bec9a..38bffc2e5 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -85,11 +85,11 @@ docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') | oo_split() | union(['registry.access.redhat.com']) | difference(['']) }}" - when: openshift.common.deployment_type == 'enterprise' + when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise'] - set_fact: docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') | oo_split() | difference(['']) }}" - when: openshift.common.deployment_type != 'enterprise' + when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise'] - name: Add personal registries lineinfile: @@ -131,4 +131,4 @@ register: start_result - set_fact: - node_service_status_changed = start_result | changed + node_service_status_changed: start_result | changed diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index bfabf50c5..91a2c400e 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -12,6 +12,11 @@ g_template_docker: - Docker Daemon value_type: int + - key: docker.container.dns.resolution + applications: + - Docker Daemon + value_type: int + - key: docker.storage.is_loopback applications: - Docker Storage @@ -62,6 +67,11 @@ g_template_docker: url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc' priority: high + - name: 'docker.container.dns.resolution failed on {HOST.NAME}' + expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc' + priority: high + - name: 'Docker storage is using LOOPBACK on {HOST.NAME}' expression: '{Template Docker:docker.storage.is_loopback.last()}<>0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 514d6fd24..a0ba8d104 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -269,6 +269,14 @@ g_template_openshift_master: - 'Openshift Master process not running on {HOST.NAME}' priority: avg + - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}' + expression: '{Template Openshift Master:create_app.sum(1h)}>3' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' + dependencies: + - 'Openshift Master process not running on {HOST.NAME}' + description: The application create loop has failed 4 or more times in the last hour + priority: avg + - name: 'Openshift Master API health check is failing on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml index 0e8e53bb7..db5665993 100644 --- a/roles/os_zabbix/vars/template_zagg_server.yml +++ b/roles/os_zabbix/vars/template_zagg_server.yml @@ -7,7 +7,12 @@ g_template_zagg_server: - Zagg Server value_type: int - - key: zagg.server.processor.errors + - key: zagg.server.metrics.errors + applications: + - Zagg Server + value_type: int + + - key: zagg.server.heartbeat.errors applications: - Zagg Server value_type: int @@ -18,8 +23,13 @@ g_template_zagg_server: value_type: int ztriggers: - - name: 'Error sending metrics on {HOST.NAME}' - expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0' + - name: 'Error processing metrics on {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' + priority: average + + - name: 'Error processing heartbeats on {HOST.NAME}' + expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' priority: average diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 index 978e40b88..bcc8a5e03 100644 --- a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 +++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 @@ -42,7 +42,7 @@ ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }} -v /etc/localtime:/etc/localtime \ -v /run/pcp:/run/pcp \ -v /var/run/docker.sock:/var/run/docker.sock \ - -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock \ + -v /var/run/openvswitch:/var/run/openvswitch \ {% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %} -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig \ -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \ |