diff options
43 files changed, 1694 insertions, 189 deletions
diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible index c2f5784ce..51ee78d34 100644 --- a/.tito/packages/openshift-ansible +++ b/.tito/packages/openshift-ansible @@ -1 +1 @@ -3.0.12-1 ./ +3.0.14-1 ./ diff --git a/.tito/packages/openshift-ansible-bin b/.tito/packages/openshift-ansible-bin deleted file mode 100644 index 5275dfcf9..000000000 --- a/.tito/packages/openshift-ansible-bin +++ /dev/null @@ -1 +0,0 @@ -0.0.21-1 bin/ diff --git a/.tito/packages/openshift-ansible-inventory b/.tito/packages/openshift-ansible-inventory deleted file mode 100644 index 85502438a..000000000 --- a/.tito/packages/openshift-ansible-inventory +++ /dev/null @@ -1 +0,0 @@ -0.0.11-1 inventory/ diff --git a/.tito/releasers.conf b/.tito/releasers.conf index f863ce9b1..a9116291a 100644 --- a/.tito/releasers.conf +++ b/.tito/releasers.conf @@ -11,3 +11,7 @@ srpm_disttag = .el7ose releaser = tito.release.DistGitReleaser branches = rhaos-3.1-rhel-7 srpm_disttag = .el7aos + +[copr-openshift-ansible] +releaser = tito.release.CoprReleaser +project_name = openshift-ansible diff --git a/README_AWS.md b/README_AWS.md index d9e2ac5a9..16ccb07e8 100644 --- a/README_AWS.md +++ b/README_AWS.md @@ -81,9 +81,20 @@ Node specific defaults: - Docker volume type: gp2 (only applicable if ephemeral is false) - Docker volume iops: 500 (only applicable when volume type is io1) +Specifying ec2 instance type. +All instances: +- export ec2_instance_type='m4.large' +Master instances: +- export ec2_master_instance_type='m4.large' +Infra node instances: +- export ec2_infra_instance_type='m4.large' +Non-infra node instances: +- export ec2_node_instance_type='m4.large' +etcd instances: +- export ec2_etcd_instance_type='m4.large' + If needed, these values can be changed by setting environment variables on your system. -- export ec2_instance_type='m4.large' - export ec2_image='ami-307b3658' - export ec2_region='us-east-1' - export ec2_keypair='libra' diff --git a/README_GCE.md b/README_GCE.md index 50f8ade70..ea673b44d 100644 --- a/README_GCE.md +++ b/README_GCE.md @@ -43,7 +43,11 @@ Mandatory customization variables (check the values according to your tenant): * zone = europe-west1-d * network = default * gce_machine_type = n1-standard-2 +* gce_machine_master_type = n1-standard-1 +* gce_machine_node_type = n1-standard-2 * gce_machine_image = preinstalled-slave-50g-v5 +* gce_machine_master_image = preinstalled-slave-50g-v5 +* gce_machine_node_image = preinstalled-slave-50g-v5 1. vi ~/.gce/gce.ini @@ -56,7 +60,11 @@ gce_project_id = project_id zone = europe-west1-d network = default gce_machine_type = n1-standard-2 +gce_machine_master_type = n1-standard-1 +gce_machine_node_type = n1-standard-2 gce_machine_image = preinstalled-slave-50g-v5 +gce_machine_master_image = preinstalled-slave-50g-v5 +gce_machine_node_image = preinstalled-slave-50g-v5 ``` 1. Define the environment variable GCE_INI_PATH so gce.py can pick it up and bin/cluster can also read it diff --git a/bin/cluster b/bin/cluster index 220f11d49..a3d4b629c 100755 --- a/bin/cluster +++ b/bin/cluster @@ -163,7 +163,7 @@ class Cluster(object): boto_configs = [conf for conf in boto_conf_files if conf_exists(conf)] if len(key_missing) > 0 and len(boto_configs) == 0: - raise ValueError("PROVIDER aws requires {} environment variable(s). See README_AWS.md".format(key_missing)) + raise ValueError("PROVIDER aws requires {0} environment variable(s). See README_AWS.md".format(key_missing)) elif 'libvirt' == provider: inventory = '-i inventory/libvirt/hosts' @@ -171,7 +171,7 @@ class Cluster(object): inventory = '-i inventory/openstack/hosts' else: # this code should never be reached - raise ValueError("invalid PROVIDER {}".format(provider)) + raise ValueError("invalid PROVIDER {0}".format(provider)) return inventory @@ -186,18 +186,18 @@ class Cluster(object): verbose = '' if args.verbose > 0: - verbose = '-{}'.format('v' * args.verbose) + verbose = '-{0}'.format('v' * args.verbose) if args.option: for opt in args.option: k, v = opt.split('=', 1) env['cli_' + k] = v - ansible_env = '-e \'{}\''.format( + ansible_env = '-e \'{0}\''.format( ' '.join(['%s=%s' % (key, value) for (key, value) in env.items()]) ) - command = 'ansible-playbook {} {} {} {}'.format( + command = 'ansible-playbook {0} {1} {2} {3}'.format( verbose, inventory, ansible_env, playbook ) @@ -205,16 +205,16 @@ class Cluster(object): command = 'ANSIBLE_CALLBACK_PLUGINS=ansible-profile/callback_plugins ' + command if args.verbose > 1: - command = 'time {}'.format(command) + command = 'time {0}'.format(command) if args.verbose > 0: - sys.stderr.write('RUN [{}]\n'.format(command)) + sys.stderr.write('RUN [{0}]\n'.format(command)) sys.stderr.flush() try: subprocess.check_call(command, shell=True) except subprocess.CalledProcessError as exc: - raise ActionFailed("ACTION [{}] failed: {}" + raise ActionFailed("ACTION [{0}] failed: {1}" .format(args.action, exc)) @@ -325,14 +325,14 @@ if __name__ == '__main__': args = parser.parse_args() if 'terminate' == args.action and not args.force: - answer = raw_input("This will destroy the ENTIRE {} environment. Are you sure? [y/N] ".format(args.cluster_id)) + answer = raw_input("This will destroy the ENTIRE {0} environment. Are you sure? [y/N] ".format(args.cluster_id)) if answer not in ['y', 'Y']: sys.stderr.write('\nACTION [terminate] aborted by user!\n') exit(1) if 'update' == args.action and not args.force: answer = raw_input( - "This is destructive and could corrupt {} environment. Continue? [y/N] ".format(args.cluster_id)) + "This is destructive and could corrupt {0} environment. Continue? [y/N] ".format(args.cluster_id)) if answer not in ['y', 'Y']: sys.stderr.write('\nACTION [update] aborted by user!\n') exit(1) diff --git a/filter_plugins/oo_filters.py b/filter_plugins/oo_filters.py index 2386b5878..1a854f637 100644 --- a/filter_plugins/oo_filters.py +++ b/filter_plugins/oo_filters.py @@ -191,7 +191,11 @@ class FilterModule(object): { 'root': { 'volume_size': 10, 'device_type': 'gp2', 'iops': 500 - } + }, + 'docker': + { 'volume_size': 40, 'device_type': 'gp2', + 'iops': 500, 'ephemeral': 'true' + } }, 'node': { 'root': @@ -216,7 +220,7 @@ class FilterModule(object): root_vol['delete_on_termination'] = True if root_vol['device_type'] != 'io1': root_vol.pop('iops', None) - if host_type == 'node': + if host_type in ['master', 'node'] and 'docker' in data[host_type]: docker_vol = data[host_type]['docker'] docker_vol['device_name'] = '/dev/xvdb' docker_vol['delete_on_termination'] = True @@ -227,7 +231,7 @@ class FilterModule(object): docker_vol.pop('delete_on_termination', None) docker_vol['ephemeral'] = 'ephemeral0' return [root_vol, docker_vol] - elif host_type == 'etcd': + elif host_type == 'etcd' and 'etcd' in data[host_type]: etcd_vol = data[host_type]['etcd'] etcd_vol['device_name'] = '/dev/xvdb' etcd_vol['delete_on_termination'] = True @@ -346,27 +350,27 @@ class FilterModule(object): @staticmethod # pylint: disable=too-many-branches - def oo_parse_certificate_names(certificates, data_dir, internal_hostnames): + def oo_parse_named_certificates(certificates, named_certs_dir, internal_hostnames): ''' Parses names from list of certificate hashes. - Ex: certificates = [{ "certfile": "/etc/origin/master/custom1.crt", - "keyfile": "/etc/origin/master/custom1.key" }, + Ex: certificates = [{ "certfile": "/root/custom1.crt", + "keyfile": "/root/custom1.key" }, { "certfile": "custom2.crt", "keyfile": "custom2.key" }] - returns [{ "certfile": "/etc/origin/master/custom1.crt", - "keyfile": "/etc/origin/master/custom1.key", + returns [{ "certfile": "/etc/origin/master/named_certificates/custom1.crt", + "keyfile": "/etc/origin/master/named_certificates/custom1.key", "names": [ "public-master-host.com", "other-master-host.com" ] }, - { "certfile": "/etc/origin/master/custom2.crt", - "keyfile": "/etc/origin/master/custom2.key", + { "certfile": "/etc/origin/master/named_certificates/custom2.crt", + "keyfile": "/etc/origin/master/named_certificates/custom2.key", "names": [ "some-hostname.com" ] }] ''' if not issubclass(type(certificates), list): raise errors.AnsibleFilterError("|failed expects certificates is a list") - if not issubclass(type(data_dir), unicode): - raise errors.AnsibleFilterError("|failed expects data_dir is unicode") + if not issubclass(type(named_certs_dir), unicode): + raise errors.AnsibleFilterError("|failed expects named_certs_dir is unicode") if not issubclass(type(internal_hostnames), list): raise errors.AnsibleFilterError("|failed expects internal_hostnames is list") @@ -399,6 +403,11 @@ class FilterModule(object): raise errors.AnsibleFilterError(("|failed to parse certificate '%s' or " % certificate['certfile'] + "detected a collision with internal hostname, please specify " + "certificate names in host inventory")) + + for certificate in certificates: + # Update paths for configuration + certificate['certfile'] = os.path.join(named_certs_dir, os.path.basename(certificate['certfile'])) + certificate['keyfile'] = os.path.join(named_certs_dir, os.path.basename(certificate['keyfile'])) return certificates @staticmethod @@ -474,7 +483,7 @@ class FilterModule(object): "oo_split": self.oo_split, "oo_filter_list": self.oo_filter_list, "oo_parse_heat_stack_outputs": self.oo_parse_heat_stack_outputs, - "oo_parse_certificate_names": self.oo_parse_certificate_names, + "oo_parse_named_certificates": self.oo_parse_named_certificates, "oo_haproxy_backend_masters": self.oo_haproxy_backend_masters, "oo_pretty_print_cluster": self.oo_pretty_print_cluster } diff --git a/inventory/aws/hosts/ec2.ini b/inventory/aws/hosts/ec2.ini index eaab0a410..1f503b8cf 100644 --- a/inventory/aws/hosts/ec2.ini +++ b/inventory/aws/hosts/ec2.ini @@ -24,24 +24,61 @@ regions_exclude = us-gov-west-1,cn-north-1 # This is the normal destination variable to use. If you are running Ansible # from outside EC2, then 'public_dns_name' makes the most sense. If you are # running Ansible from within EC2, then perhaps you want to use the internal -# address, and should set this to 'private_dns_name'. +# address, and should set this to 'private_dns_name'. The key of an EC2 tag +# may optionally be used; however the boto instance variables hold precedence +# in the event of a collision. destination_variable = public_dns_name # For server inside a VPC, using DNS names may not make sense. When an instance # has 'subnet_id' set, this variable is used. If the subnet is public, setting # this to 'ip_address' will return the public IP address. For instances in a # private subnet, this should be set to 'private_ip_address', and Ansible must -# be run from with EC2. +# be run from within EC2. The key of an EC2 tag may optionally be used; however +# the boto instance variables hold precedence in the event of a collision. +# WARNING: - instances that are in the private vpc, _without_ public ip address +# will not be listed in the inventory until You set: +# vpc_destination_variable = 'private_ip_address' vpc_destination_variable = ip_address # To tag instances on EC2 with the resource records that point to them from # Route53, uncomment and set 'route53' to True. route53 = False +# To exclude RDS instances from the inventory, uncomment and set to False. +#rds = False + +# To exclude ElastiCache instances from the inventory, uncomment and set to False. +#elasticache = False + # Additionally, you can specify the list of zones to exclude looking up in # 'route53_excluded_zones' as a comma-separated list. # route53_excluded_zones = samplezone1.com, samplezone2.com +# By default, only EC2 instances in the 'running' state are returned. Set +# 'all_instances' to True to return all instances regardless of state. +all_instances = False + +# By default, only EC2 instances in the 'running' state are returned. Specify +# EC2 instance states to return as a comma-separated list. This +# option is overriden when 'all_instances' is True. +# instance_states = pending, running, shutting-down, terminated, stopping, stopped + +# By default, only RDS instances in the 'available' state are returned. Set +# 'all_rds_instances' to True return all RDS instances regardless of state. +all_rds_instances = False + +# By default, only ElastiCache clusters and nodes in the 'available' state +# are returned. Set 'all_elasticache_clusters' and/or 'all_elastic_nodes' +# to True return all ElastiCache clusters and nodes, regardless of state. +# +# Note that all_elasticache_nodes only applies to listed clusters. That means +# if you set all_elastic_clusters to false, no node will be return from +# unavailable clusters, regardless of the state and to what you set for +# all_elasticache_nodes. +all_elasticache_replication_groups = False +all_elasticache_clusters = False +all_elasticache_nodes = False + # API calls to EC2 are slow. For this reason, we cache the results of an API # call. Set this to the path you want cache files to be written to. Two files # will be written to this directory: @@ -60,3 +97,59 @@ cache_max_age = 300 # destination_variable and vpc_destination_variable. # destination_format = {0}.{1}.rhcloud.com # destination_format_tags = Name,environment + +# Organize groups into a nested/hierarchy instead of a flat namespace. +nested_groups = False + +# Replace - tags when creating groups to avoid issues with ansible +replace_dash_in_groups = False + +# The EC2 inventory output can become very large. To manage its size, +# configure which groups should be created. +group_by_instance_id = True +group_by_region = True +group_by_availability_zone = True +group_by_ami_id = True +group_by_instance_type = True +group_by_key_pair = True +group_by_vpc_id = True +group_by_security_group = True +group_by_tag_keys = True +group_by_tag_none = True +group_by_route53_names = True +group_by_rds_engine = True +group_by_rds_parameter_group = True +group_by_elasticache_engine = True +group_by_elasticache_cluster = True +group_by_elasticache_parameter_group = True +group_by_elasticache_replication_group = True + +# If you only want to include hosts that match a certain regular expression +# pattern_include = staging-* + +# If you want to exclude any hosts that match a certain regular expression +# pattern_exclude = staging-* + +# Instance filters can be used to control which instances are retrieved for +# inventory. For the full list of possible filters, please read the EC2 API +# docs: http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ApiReference-query-DescribeInstances.html#query-DescribeInstances-filters +# Filters are key/value pairs separated by '=', to list multiple filters use +# a list separated by commas. See examples below. + +# Retrieve only instances with (key=value) env=staging tag +# instance_filters = tag:env=staging + +# Retrieve only instances with role=webservers OR role=dbservers tag +# instance_filters = tag:role=webservers,tag:role=dbservers + +# Retrieve only t1.micro instances OR instances with tag env=staging +# instance_filters = instance-type=t1.micro,tag:env=staging + +# You can use wildcards in filter values also. Below will list instances which +# tag Name value matches webservers1* +# (ex. webservers15, webservers1a, webservers123 etc) +# instance_filters = tag:Name=webservers1* + +# A boto configuration profile may be used to separate out credentials +# see http://boto.readthedocs.org/en/latest/boto_config_tut.html +# boto_profile = some-boto-profile-name diff --git a/inventory/aws/hosts/ec2.py b/inventory/aws/hosts/ec2.py index f231ff4c2..8b878cafd 100755 --- a/inventory/aws/hosts/ec2.py +++ b/inventory/aws/hosts/ec2.py @@ -22,6 +22,12 @@ you need to define: export EC2_URL=http://hostname_of_your_cc:port/services/Eucalyptus +If you're using boto profiles (requires boto>=2.24.0) you can choose a profile +using the --boto-profile command line argument (e.g. ec2.py --boto-profile prod) or using +the AWS_PROFILE variable: + + AWS_PROFILE=prod ansible-playbook -i ec2.py myplaybook.yml + For more details, see: http://docs.pythonboto.org/en/latest/boto_config_tut.html When run against a specific host, this script returns the following variables: @@ -121,8 +127,11 @@ from time import time import boto from boto import ec2 from boto import rds +from boto import elasticache from boto import route53 -import ConfigParser +import six + +from six.moves import configparser from collections import defaultdict try: @@ -145,9 +154,18 @@ class Ec2Inventory(object): # Index of hostname (address) to instance ID self.index = {} + # Boto profile to use (if any) + self.boto_profile = None + # Read settings and parse CLI arguments - self.read_settings() self.parse_cli_args() + self.read_settings() + + # Make sure that profile_name is not passed at all if not set + # as pre 2.24 boto will fall over otherwise + if self.boto_profile: + if not hasattr(boto.ec2.EC2Connection, 'profile_name'): + self.fail_with_error("boto version must be >= 2.24 to use profile") # Cache if self.args.refresh_cache: @@ -166,7 +184,7 @@ class Ec2Inventory(object): else: data_to_print = self.json_format_dict(self.inventory, True) - print data_to_print + print(data_to_print) def is_cache_valid(self): @@ -184,10 +202,12 @@ class Ec2Inventory(object): def read_settings(self): ''' Reads the settings from the ec2.ini file ''' - - config = ConfigParser.SafeConfigParser() + if six.PY3: + config = configparser.ConfigParser() + else: + config = configparser.SafeConfigParser() ec2_default_ini_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'ec2.ini') - ec2_ini_path = os.environ.get('EC2_INI_PATH', ec2_default_ini_path) + ec2_ini_path = os.path.expanduser(os.path.expandvars(os.environ.get('EC2_INI_PATH', ec2_default_ini_path))) config.read(ec2_ini_path) # is eucalyptus? @@ -236,18 +256,72 @@ class Ec2Inventory(object): if config.has_option('ec2', 'rds'): self.rds_enabled = config.getboolean('ec2', 'rds') - # Return all EC2 and RDS instances (if RDS is enabled) + # Include ElastiCache instances? + self.elasticache_enabled = True + if config.has_option('ec2', 'elasticache'): + self.elasticache_enabled = config.getboolean('ec2', 'elasticache') + + # Return all EC2 instances? if config.has_option('ec2', 'all_instances'): self.all_instances = config.getboolean('ec2', 'all_instances') else: self.all_instances = False + + # Instance states to be gathered in inventory. Default is 'running'. + # Setting 'all_instances' to 'yes' overrides this option. + ec2_valid_instance_states = [ + 'pending', + 'running', + 'shutting-down', + 'terminated', + 'stopping', + 'stopped' + ] + self.ec2_instance_states = [] + if self.all_instances: + self.ec2_instance_states = ec2_valid_instance_states + elif config.has_option('ec2', 'instance_states'): + for instance_state in config.get('ec2', 'instance_states').split(','): + instance_state = instance_state.strip() + if instance_state not in ec2_valid_instance_states: + continue + self.ec2_instance_states.append(instance_state) + else: + self.ec2_instance_states = ['running'] + + # Return all RDS instances? (if RDS is enabled) if config.has_option('ec2', 'all_rds_instances') and self.rds_enabled: self.all_rds_instances = config.getboolean('ec2', 'all_rds_instances') else: self.all_rds_instances = False + # Return all ElastiCache replication groups? (if ElastiCache is enabled) + if config.has_option('ec2', 'all_elasticache_replication_groups') and self.elasticache_enabled: + self.all_elasticache_replication_groups = config.getboolean('ec2', 'all_elasticache_replication_groups') + else: + self.all_elasticache_replication_groups = False + + # Return all ElastiCache clusters? (if ElastiCache is enabled) + if config.has_option('ec2', 'all_elasticache_clusters') and self.elasticache_enabled: + self.all_elasticache_clusters = config.getboolean('ec2', 'all_elasticache_clusters') + else: + self.all_elasticache_clusters = False + + # Return all ElastiCache nodes? (if ElastiCache is enabled) + if config.has_option('ec2', 'all_elasticache_nodes') and self.elasticache_enabled: + self.all_elasticache_nodes = config.getboolean('ec2', 'all_elasticache_nodes') + else: + self.all_elasticache_nodes = False + + # boto configuration profile (prefer CLI argument) + self.boto_profile = self.args.boto_profile + if config.has_option('ec2', 'boto_profile') and not self.boto_profile: + self.boto_profile = config.get('ec2', 'boto_profile') + # Cache related cache_dir = os.path.expanduser(config.get('ec2', 'cache_path')) + if self.boto_profile: + cache_dir = os.path.join(cache_dir, 'profile_' + self.boto_profile) if not os.path.exists(cache_dir): os.makedirs(cache_dir) @@ -261,6 +335,12 @@ class Ec2Inventory(object): else: self.nested_groups = False + # Replace dash or not in group names + if config.has_option('ec2', 'replace_dash_in_groups'): + self.replace_dash_in_groups = config.getboolean('ec2', 'replace_dash_in_groups') + else: + self.replace_dash_in_groups = True + # Configure which groups should be created. group_by_options = [ 'group_by_instance_id', @@ -276,6 +356,10 @@ class Ec2Inventory(object): 'group_by_route53_names', 'group_by_rds_engine', 'group_by_rds_parameter_group', + 'group_by_elasticache_engine', + 'group_by_elasticache_cluster', + 'group_by_elasticache_parameter_group', + 'group_by_elasticache_replication_group', ] for option in group_by_options: if config.has_option('ec2', option): @@ -290,7 +374,7 @@ class Ec2Inventory(object): self.pattern_include = re.compile(pattern_include) else: self.pattern_include = None - except ConfigParser.NoOptionError, e: + except configparser.NoOptionError: self.pattern_include = None # Do we need to exclude hosts that match a pattern? @@ -300,7 +384,7 @@ class Ec2Inventory(object): self.pattern_exclude = re.compile(pattern_exclude) else: self.pattern_exclude = None - except ConfigParser.NoOptionError, e: + except configparser.NoOptionError: self.pattern_exclude = None # Instance filters (see boto and EC2 API docs). Ignore invalid filters. @@ -325,6 +409,8 @@ class Ec2Inventory(object): help='Get all the variables about a specific instance') parser.add_argument('--refresh-cache', action='store_true', default=False, help='Force refresh of cache by making API requests to EC2 (default: False - use cache files)') + parser.add_argument('--boto-profile', action='store', + help='Use boto profile for connections to EC2') self.args = parser.parse_args() @@ -338,30 +424,52 @@ class Ec2Inventory(object): self.get_instances_by_region(region) if self.rds_enabled: self.get_rds_instances_by_region(region) + if self.elasticache_enabled: + self.get_elasticache_clusters_by_region(region) + self.get_elasticache_replication_groups_by_region(region) self.write_to_cache(self.inventory, self.cache_path_cache) self.write_to_cache(self.index, self.cache_path_index) + def connect(self, region): + ''' create connection to api server''' + if self.eucalyptus: + conn = boto.connect_euca(host=self.eucalyptus_host) + conn.APIVersion = '2010-08-31' + else: + conn = self.connect_to_aws(ec2, region) + return conn + + def boto_fix_security_token_in_profile(self, connect_args): + ''' monkey patch for boto issue boto/boto#2100 ''' + profile = 'profile ' + self.boto_profile + if boto.config.has_option(profile, 'aws_security_token'): + connect_args['security_token'] = boto.config.get(profile, 'aws_security_token') + return connect_args + + def connect_to_aws(self, module, region): + connect_args = {} + + # only pass the profile name if it's set (as it is not supported by older boto versions) + if self.boto_profile: + connect_args['profile_name'] = self.boto_profile + self.boto_fix_security_token_in_profile(connect_args) + + conn = module.connect_to_region(region, **connect_args) + # connect_to_region will fail "silently" by returning None if the region name is wrong or not supported + if conn is None: + self.fail_with_error("region name: %s likely not supported, or AWS is down. connection to region failed." % region) + return conn def get_instances_by_region(self, region): ''' Makes an AWS EC2 API call to the list of instances in a particular region ''' try: - if self.eucalyptus: - conn = boto.connect_euca(host=self.eucalyptus_host) - conn.APIVersion = '2010-08-31' - else: - conn = ec2.connect_to_region(region) - - # connect_to_region will fail "silently" by returning None if the region name is wrong or not supported - if conn is None: - print("region name: %s likely not supported, or AWS is down. connection to region failed." % region) - sys.exit(1) - + conn = self.connect(region) reservations = [] if self.ec2_instance_filters: - for filter_key, filter_values in self.ec2_instance_filters.iteritems(): + for filter_key, filter_values in self.ec2_instance_filters.items(): reservations.extend(conn.get_all_instances(filters = { filter_key : filter_values })) else: reservations = conn.get_all_instances() @@ -370,40 +478,130 @@ class Ec2Inventory(object): for instance in reservation.instances: self.add_instance(instance, region) - except boto.exception.BotoServerError, e: - if not self.eucalyptus: - print "Looks like AWS is down again:" - print e - sys.exit(1) + except boto.exception.BotoServerError as e: + if e.error_code == 'AuthFailure': + error = self.get_auth_error_message() + else: + backend = 'Eucalyptus' if self.eucalyptus else 'AWS' + error = "Error connecting to %s backend.\n%s" % (backend, e.message) + self.fail_with_error(error, 'getting EC2 instances') def get_rds_instances_by_region(self, region): ''' Makes an AWS API call to the list of RDS instances in a particular region ''' try: - conn = rds.connect_to_region(region) + conn = self.connect_to_aws(rds, region) if conn: instances = conn.get_all_dbinstances() for instance in instances: self.add_rds_instance(instance, region) - except boto.exception.BotoServerError, e: + except boto.exception.BotoServerError as e: + error = e.reason + + if e.error_code == 'AuthFailure': + error = self.get_auth_error_message() if not e.reason == "Forbidden": - print "Looks like AWS RDS is down: " - print e - sys.exit(1) + error = "Looks like AWS RDS is down:\n%s" % e.message + self.fail_with_error(error, 'getting RDS instances') - def get_instance(self, region, instance_id): - ''' Gets details about a specific instance ''' - if self.eucalyptus: - conn = boto.connect_euca(self.eucalyptus_host) - conn.APIVersion = '2010-08-31' + def get_elasticache_clusters_by_region(self, region): + ''' Makes an AWS API call to the list of ElastiCache clusters (with + nodes' info) in a particular region.''' + + # ElastiCache boto module doesn't provide a get_all_intances method, + # that's why we need to call describe directly (it would be called by + # the shorthand method anyway...) + try: + conn = elasticache.connect_to_region(region) + if conn: + # show_cache_node_info = True + # because we also want nodes' information + response = conn.describe_cache_clusters(None, None, None, True) + + except boto.exception.BotoServerError as e: + error = e.reason + + if e.error_code == 'AuthFailure': + error = self.get_auth_error_message() + if not e.reason == "Forbidden": + error = "Looks like AWS ElastiCache is down:\n%s" % e.message + self.fail_with_error(error, 'getting ElastiCache clusters') + + try: + # Boto also doesn't provide wrapper classes to CacheClusters or + # CacheNodes. Because of that wo can't make use of the get_list + # method in the AWSQueryConnection. Let's do the work manually + clusters = response['DescribeCacheClustersResponse']['DescribeCacheClustersResult']['CacheClusters'] + + except KeyError as e: + error = "ElastiCache query to AWS failed (unexpected format)." + self.fail_with_error(error, 'getting ElastiCache clusters') + + for cluster in clusters: + self.add_elasticache_cluster(cluster, region) + + def get_elasticache_replication_groups_by_region(self, region): + ''' Makes an AWS API call to the list of ElastiCache replication groups + in a particular region.''' + + # ElastiCache boto module doesn't provide a get_all_intances method, + # that's why we need to call describe directly (it would be called by + # the shorthand method anyway...) + try: + conn = elasticache.connect_to_region(region) + if conn: + response = conn.describe_replication_groups() + + except boto.exception.BotoServerError as e: + error = e.reason + + if e.error_code == 'AuthFailure': + error = self.get_auth_error_message() + if not e.reason == "Forbidden": + error = "Looks like AWS ElastiCache [Replication Groups] is down:\n%s" % e.message + self.fail_with_error(error, 'getting ElastiCache clusters') + + try: + # Boto also doesn't provide wrapper classes to ReplicationGroups + # Because of that wo can't make use of the get_list method in the + # AWSQueryConnection. Let's do the work manually + replication_groups = response['DescribeReplicationGroupsResponse']['DescribeReplicationGroupsResult']['ReplicationGroups'] + + except KeyError as e: + error = "ElastiCache [Replication Groups] query to AWS failed (unexpected format)." + self.fail_with_error(error, 'getting ElastiCache clusters') + + for replication_group in replication_groups: + self.add_elasticache_replication_group(replication_group, region) + + def get_auth_error_message(self): + ''' create an informative error message if there is an issue authenticating''' + errors = ["Authentication error retrieving ec2 inventory."] + if None in [os.environ.get('AWS_ACCESS_KEY_ID'), os.environ.get('AWS_SECRET_ACCESS_KEY')]: + errors.append(' - No AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment vars found') else: - conn = ec2.connect_to_region(region) + errors.append(' - AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment vars found but may not be correct') - # connect_to_region will fail "silently" by returning None if the region name is wrong or not supported - if conn is None: - print("region name: %s likely not supported, or AWS is down. connection to region failed." % region) - sys.exit(1) + boto_paths = ['/etc/boto.cfg', '~/.boto', '~/.aws/credentials'] + boto_config_found = list(p for p in boto_paths if os.path.isfile(os.path.expanduser(p))) + if len(boto_config_found) > 0: + errors.append(" - Boto configs found at '%s', but the credentials contained may not be correct" % ', '.join(boto_config_found)) + else: + errors.append(" - No Boto config found at any expected location '%s'" % ', '.join(boto_paths)) + + return '\n'.join(errors) + + def fail_with_error(self, err_msg, err_operation=None): + '''log an error to std err for ansible-playbook to consume and exit''' + if err_operation: + err_msg = 'ERROR: "{err_msg}", while: {err_operation}'.format( + err_msg=err_msg, err_operation=err_operation) + sys.stderr.write(err_msg) + sys.exit(1) + + def get_instance(self, region, instance_id): + conn = self.connect(region) reservations = conn.get_all_instances([instance_id]) for reservation in reservations: @@ -414,8 +612,8 @@ class Ec2Inventory(object): ''' Adds an instance to the inventory and index, as long as it is addressable ''' - # Only want running instances unless all_instances is True - if not self.all_instances and instance.state != 'running': + # Only return instances with desired instance states + if instance.state not in self.ec2_instance_states: return # Select the best destination address @@ -502,18 +700,21 @@ class Ec2Inventory(object): if self.nested_groups: self.push_group(self.inventory, 'security_groups', key) except AttributeError: - print 'Package boto seems a bit older.' - print 'Please upgrade boto >= 2.3.0.' - sys.exit(1) + self.fail_with_error('\n'.join(['Package boto seems a bit older.', + 'Please upgrade boto >= 2.3.0.'])) # Inventory: Group by tag keys if self.group_by_tag_keys: - for k, v in instance.tags.iteritems(): - key = self.to_safe("tag_" + k + "=" + v) + for k, v in instance.tags.items(): + if v: + key = self.to_safe("tag_" + k + "=" + v) + else: + key = self.to_safe("tag_" + k) self.push(self.inventory, key, dest) if self.nested_groups: self.push_group(self.inventory, 'tags', self.to_safe("tag_" + k)) - self.push_group(self.inventory, self.to_safe("tag_" + k), key) + if v: + self.push_group(self.inventory, self.to_safe("tag_" + k), key) # Inventory: Group by Route53 domain names if enabled if self.route53_enabled and self.group_by_route53_names: @@ -597,9 +798,9 @@ class Ec2Inventory(object): self.push_group(self.inventory, 'security_groups', key) except AttributeError: - print 'Package boto seems a bit older.' - print 'Please upgrade boto >= 2.3.0.' - sys.exit(1) + self.fail_with_error('\n'.join(['Package boto seems a bit older.', + 'Please upgrade boto >= 2.3.0.'])) + # Inventory: Group by engine if self.group_by_rds_engine: @@ -618,6 +819,243 @@ class Ec2Inventory(object): self.inventory["_meta"]["hostvars"][dest] = self.get_host_info_dict_from_instance(instance) + def add_elasticache_cluster(self, cluster, region): + ''' Adds an ElastiCache cluster to the inventory and index, as long as + it's nodes are addressable ''' + + # Only want available clusters unless all_elasticache_clusters is True + if not self.all_elasticache_clusters and cluster['CacheClusterStatus'] != 'available': + return + + # Select the best destination address + if 'ConfigurationEndpoint' in cluster and cluster['ConfigurationEndpoint']: + # Memcached cluster + dest = cluster['ConfigurationEndpoint']['Address'] + is_redis = False + else: + # Redis sigle node cluster + # Because all Redis clusters are single nodes, we'll merge the + # info from the cluster with info about the node + dest = cluster['CacheNodes'][0]['Endpoint']['Address'] + is_redis = True + + if not dest: + # Skip clusters we cannot address (e.g. private VPC subnet) + return + + # Add to index + self.index[dest] = [region, cluster['CacheClusterId']] + + # Inventory: Group by instance ID (always a group of 1) + if self.group_by_instance_id: + self.inventory[cluster['CacheClusterId']] = [dest] + if self.nested_groups: + self.push_group(self.inventory, 'instances', cluster['CacheClusterId']) + + # Inventory: Group by region + if self.group_by_region and not is_redis: + self.push(self.inventory, region, dest) + if self.nested_groups: + self.push_group(self.inventory, 'regions', region) + + # Inventory: Group by availability zone + if self.group_by_availability_zone and not is_redis: + self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest) + if self.nested_groups: + if self.group_by_region: + self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone']) + self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone']) + + # Inventory: Group by node type + if self.group_by_instance_type and not is_redis: + type_name = self.to_safe('type_' + cluster['CacheNodeType']) + self.push(self.inventory, type_name, dest) + if self.nested_groups: + self.push_group(self.inventory, 'types', type_name) + + # Inventory: Group by VPC (information not available in the current + # AWS API version for ElastiCache) + + # Inventory: Group by security group + if self.group_by_security_group and not is_redis: + + # Check for the existence of the 'SecurityGroups' key and also if + # this key has some value. When the cluster is not placed in a SG + # the query can return None here and cause an error. + if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None: + for security_group in cluster['SecurityGroups']: + key = self.to_safe("security_group_" + security_group['SecurityGroupId']) + self.push(self.inventory, key, dest) + if self.nested_groups: + self.push_group(self.inventory, 'security_groups', key) + + # Inventory: Group by engine + if self.group_by_elasticache_engine and not is_redis: + self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest) + if self.nested_groups: + self.push_group(self.inventory, 'elasticache_engines', self.to_safe(cluster['Engine'])) + + # Inventory: Group by parameter group + if self.group_by_elasticache_parameter_group: + self.push(self.inventory, self.to_safe("elasticache_parameter_group_" + cluster['CacheParameterGroup']['CacheParameterGroupName']), dest) + if self.nested_groups: + self.push_group(self.inventory, 'elasticache_parameter_groups', self.to_safe(cluster['CacheParameterGroup']['CacheParameterGroupName'])) + + # Inventory: Group by replication group + if self.group_by_elasticache_replication_group and 'ReplicationGroupId' in cluster and cluster['ReplicationGroupId']: + self.push(self.inventory, self.to_safe("elasticache_replication_group_" + cluster['ReplicationGroupId']), dest) + if self.nested_groups: + self.push_group(self.inventory, 'elasticache_replication_groups', self.to_safe(cluster['ReplicationGroupId'])) + + # Global Tag: all ElastiCache clusters + self.push(self.inventory, 'elasticache_clusters', cluster['CacheClusterId']) + + host_info = self.get_host_info_dict_from_describe_dict(cluster) + + self.inventory["_meta"]["hostvars"][dest] = host_info + + # Add the nodes + for node in cluster['CacheNodes']: + self.add_elasticache_node(node, cluster, region) + + def add_elasticache_node(self, node, cluster, region): + ''' Adds an ElastiCache node to the inventory and index, as long as + it is addressable ''' + + # Only want available nodes unless all_elasticache_nodes is True + if not self.all_elasticache_nodes and node['CacheNodeStatus'] != 'available': + return + + # Select the best destination address + dest = node['Endpoint']['Address'] + + if not dest: + # Skip nodes we cannot address (e.g. private VPC subnet) + return + + node_id = self.to_safe(cluster['CacheClusterId'] + '_' + node['CacheNodeId']) + + # Add to index + self.index[dest] = [region, node_id] + + # Inventory: Group by node ID (always a group of 1) + if self.group_by_instance_id: + self.inventory[node_id] = [dest] + if self.nested_groups: + self.push_group(self.inventory, 'instances', node_id) + + # Inventory: Group by region + if self.group_by_region: + self.push(self.inventory, region, dest) + if self.nested_groups: + self.push_group(self.inventory, 'regions', region) + + # Inventory: Group by availability zone + if self.group_by_availability_zone: + self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest) + if self.nested_groups: + if self.group_by_region: + self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone']) + self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone']) + + # Inventory: Group by node type + if self.group_by_instance_type: + type_name = self.to_safe('type_' + cluster['CacheNodeType']) + self.push(self.inventory, type_name, dest) + if self.nested_groups: + self.push_group(self.inventory, 'types', type_name) + + # Inventory: Group by VPC (information not available in the current + # AWS API version for ElastiCache) + + # Inventory: Group by security group + if self.group_by_security_group: + + # Check for the existence of the 'SecurityGroups' key and also if + # this key has some value. When the cluster is not placed in a SG + # the query can return None here and cause an error. + if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None: + for security_group in cluster['SecurityGroups']: + key = self.to_safe("security_group_" + security_group['SecurityGroupId']) + self.push(self.inventory, key, dest) + if self.nested_groups: + self.push_group(self.inventory, 'security_groups', key) + + # Inventory: Group by engine + if self.group_by_elasticache_engine: + self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest) + if self.nested_groups: + self.push_group(self.inventory, 'elasticache_engines', self.to_safe("elasticache_" + cluster['Engine'])) + + # Inventory: Group by parameter group (done at cluster level) + + # Inventory: Group by replication group (done at cluster level) + + # Inventory: Group by ElastiCache Cluster + if self.group_by_elasticache_cluster: + self.push(self.inventory, self.to_safe("elasticache_cluster_" + cluster['CacheClusterId']), dest) + + # Global Tag: all ElastiCache nodes + self.push(self.inventory, 'elasticache_nodes', dest) + + host_info = self.get_host_info_dict_from_describe_dict(node) + + if dest in self.inventory["_meta"]["hostvars"]: + self.inventory["_meta"]["hostvars"][dest].update(host_info) + else: + self.inventory["_meta"]["hostvars"][dest] = host_info + + def add_elasticache_replication_group(self, replication_group, region): + ''' Adds an ElastiCache replication group to the inventory and index ''' + + # Only want available clusters unless all_elasticache_replication_groups is True + if not self.all_elasticache_replication_groups and replication_group['Status'] != 'available': + return + + # Select the best destination address (PrimaryEndpoint) + dest = replication_group['NodeGroups'][0]['PrimaryEndpoint']['Address'] + + if not dest: + # Skip clusters we cannot address (e.g. private VPC subnet) + return + + # Add to index + self.index[dest] = [region, replication_group['ReplicationGroupId']] + + # Inventory: Group by ID (always a group of 1) + if self.group_by_instance_id: + self.inventory[replication_group['ReplicationGroupId']] = [dest] + if self.nested_groups: + self.push_group(self.inventory, 'instances', replication_group['ReplicationGroupId']) + + # Inventory: Group by region + if self.group_by_region: + self.push(self.inventory, region, dest) + if self.nested_groups: + self.push_group(self.inventory, 'regions', region) + + # Inventory: Group by availability zone (doesn't apply to replication groups) + + # Inventory: Group by node type (doesn't apply to replication groups) + + # Inventory: Group by VPC (information not available in the current + # AWS API version for replication groups + + # Inventory: Group by security group (doesn't apply to replication groups) + # Check this value in cluster level + + # Inventory: Group by engine (replication groups are always Redis) + if self.group_by_elasticache_engine: + self.push(self.inventory, 'elasticache_redis', dest) + if self.nested_groups: + self.push_group(self.inventory, 'elasticache_engines', 'redis') + + # Global Tag: all ElastiCache clusters + self.push(self.inventory, 'elasticache_replication_groups', replication_group['ReplicationGroupId']) + + host_info = self.get_host_info_dict_from_describe_dict(replication_group) + + self.inventory["_meta"]["hostvars"][dest] = host_info def get_route53_records(self): ''' Get and store the map of resource records to domain names that @@ -666,7 +1104,6 @@ class Ec2Inventory(object): return list(name_list) - def get_host_info_dict_from_instance(self, instance): instance_vars = {} for key in vars(instance): @@ -683,7 +1120,7 @@ class Ec2Inventory(object): instance_vars['ec2_previous_state_code'] = instance.previous_state_code elif type(value) in [int, bool]: instance_vars[key] = value - elif type(value) in [str, unicode]: + elif isinstance(value, six.string_types): instance_vars[key] = value.strip() elif type(value) == type(None): instance_vars[key] = '' @@ -692,7 +1129,7 @@ class Ec2Inventory(object): elif key == 'ec2__placement': instance_vars['ec2_placement'] = value.zone elif key == 'ec2_tags': - for k, v in value.iteritems(): + for k, v in value.items(): key = self.to_safe('ec2_tag_' + k) instance_vars[key] = v elif key == 'ec2_groups': @@ -712,6 +1149,91 @@ class Ec2Inventory(object): return instance_vars + def get_host_info_dict_from_describe_dict(self, describe_dict): + ''' Parses the dictionary returned by the API call into a flat list + of parameters. This method should be used only when 'describe' is + used directly because Boto doesn't provide specific classes. ''' + + # I really don't agree with prefixing everything with 'ec2' + # because EC2, RDS and ElastiCache are different services. + # I'm just following the pattern used until now to not break any + # compatibility. + + host_info = {} + for key in describe_dict: + value = describe_dict[key] + key = self.to_safe('ec2_' + self.uncammelize(key)) + + # Handle complex types + + # Target: Memcached Cache Clusters + if key == 'ec2_configuration_endpoint' and value: + host_info['ec2_configuration_endpoint_address'] = value['Address'] + host_info['ec2_configuration_endpoint_port'] = value['Port'] + + # Target: Cache Nodes and Redis Cache Clusters (single node) + if key == 'ec2_endpoint' and value: + host_info['ec2_endpoint_address'] = value['Address'] + host_info['ec2_endpoint_port'] = value['Port'] + + # Target: Redis Replication Groups + if key == 'ec2_node_groups' and value: + host_info['ec2_endpoint_address'] = value[0]['PrimaryEndpoint']['Address'] + host_info['ec2_endpoint_port'] = value[0]['PrimaryEndpoint']['Port'] + replica_count = 0 + for node in value[0]['NodeGroupMembers']: + if node['CurrentRole'] == 'primary': + host_info['ec2_primary_cluster_address'] = node['ReadEndpoint']['Address'] + host_info['ec2_primary_cluster_port'] = node['ReadEndpoint']['Port'] + host_info['ec2_primary_cluster_id'] = node['CacheClusterId'] + elif node['CurrentRole'] == 'replica': + host_info['ec2_replica_cluster_address_'+ str(replica_count)] = node['ReadEndpoint']['Address'] + host_info['ec2_replica_cluster_port_'+ str(replica_count)] = node['ReadEndpoint']['Port'] + host_info['ec2_replica_cluster_id_'+ str(replica_count)] = node['CacheClusterId'] + replica_count += 1 + + # Target: Redis Replication Groups + if key == 'ec2_member_clusters' and value: + host_info['ec2_member_clusters'] = ','.join([str(i) for i in value]) + + # Target: All Cache Clusters + elif key == 'ec2_cache_parameter_group': + host_info["ec2_cache_node_ids_to_reboot"] = ','.join([str(i) for i in value['CacheNodeIdsToReboot']]) + host_info['ec2_cache_parameter_group_name'] = value['CacheParameterGroupName'] + host_info['ec2_cache_parameter_apply_status'] = value['ParameterApplyStatus'] + + # Target: Almost everything + elif key == 'ec2_security_groups': + + # Skip if SecurityGroups is None + # (it is possible to have the key defined but no value in it). + if value is not None: + sg_ids = [] + for sg in value: + sg_ids.append(sg['SecurityGroupId']) + host_info["ec2_security_group_ids"] = ','.join([str(i) for i in sg_ids]) + + # Target: Everything + # Preserve booleans and integers + elif type(value) in [int, bool]: + host_info[key] = value + + # Target: Everything + # Sanitize string values + elif isinstance(value, six.string_types): + host_info[key] = value.strip() + + # Target: Everything + # Replace None by an empty string + elif type(value) == type(None): + host_info[key] = '' + + else: + # Remove non-processed complex types + pass + + return host_info + def get_host_info(self): ''' Get variables about a specific host ''' @@ -775,13 +1297,16 @@ class Ec2Inventory(object): cache.write(json_data) cache.close() + def uncammelize(self, key): + temp = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', key) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', temp).lower() def to_safe(self, word): - ''' Converts 'bad' characters in a string to underscores so they can be - used as Ansible groups ''' - - return re.sub("[^A-Za-z0-9\-]", "_", word) - + ''' Converts 'bad' characters in a string to underscores so they can be used as Ansible groups ''' + regex = "[^A-Za-z0-9\_" + if not self.replace_dash_in_groups: + regex += "\-" + return re.sub(regex + "]", "_", word) def json_format_dict(self, data, pretty=False): ''' Converts a dict to a JSON object and dumps it as a formatted diff --git a/inventory/byo/hosts.example b/inventory/byo/hosts.example index 56bbb9612..423581281 100644 --- a/inventory/byo/hosts.example +++ b/inventory/byo/hosts.example @@ -111,8 +111,17 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # set RPM version for debugging purposes #openshift_pkg_version=-3.0.0.0 -# Configure custom master certificates +# Configure custom named certificates +# NOTE: openshift_master_named_certificates is cached on masters and is an +# additive fact, meaning that each run with a different set of certificates +# will add the newly provided certificates to the cached set of certificates. +# If you would like openshift_master_named_certificates to be overwritten with +# the provided value, specify openshift_master_overwrite_named_certificates. +#openshift_master_overwrite_named_certificates: true +# +# Provide local certificate paths which will be deployed to masters #openshift_master_named_certificates=[{"certfile": "/path/to/custom1.crt", "keyfile": "/path/to/custom1.key"}] +# # Detected names may be overridden by specifying the "names" key #openshift_master_named_certificates=[{"certfile": "/path/to/custom1.crt", "keyfile": "/path/to/custom1.key", "names": ["public-master-host.com"]}] diff --git a/openshift-ansible.spec b/openshift-ansible.spec index 21f624400..fef7aab79 100644 --- a/openshift-ansible.spec +++ b/openshift-ansible.spec @@ -5,7 +5,7 @@ } Name: openshift-ansible -Version: 3.0.12 +Version: 3.0.14 Release: 1%{?dist} Summary: Openshift and Atomic Enterprise Ansible License: ASL 2.0 @@ -13,7 +13,8 @@ URL: https://github.com/openshift/openshift-ansible Source0: https://github.com/openshift/openshift-ansible/archive/%{commit}/%{name}-%{version}.tar.gz BuildArch: noarch -Requires: ansible +Requires: ansible >= 1.9.3 +Requires: python2 %description Openshift and Atomic Enterprise Ansible @@ -96,8 +97,9 @@ popd # ---------------------------------------------------------------------------------- %package bin Summary: Openshift and Atomic Enterprise Ansible Scripts for working with metadata hosts -Requires: %{name}-inventory -Requires: python2 +Requires: %{name} = %{version} +Requires: %{name}-inventory = %{version} +Requires: %{name}-playbooks = %{version} BuildRequires: python2-devel BuildArch: noarch @@ -117,7 +119,7 @@ Scripts to make it nicer when working with hosts that are defined only by metada # ---------------------------------------------------------------------------------- %package docs Summary: Openshift and Atomic Enterprise Ansible documents -Requires: %{name} +Requires: %{name} = %{version} BuildArch: noarch %description docs @@ -131,7 +133,7 @@ BuildArch: noarch # ---------------------------------------------------------------------------------- %package inventory Summary: Openshift and Atomic Enterprise Ansible Inventories -Requires: python2 +Requires: %{name} = %{version} BuildArch: noarch %description inventory @@ -144,7 +146,7 @@ Ansible Inventories used with the openshift-ansible scripts and playbooks. %package inventory-aws Summary: Openshift and Atomic Enterprise Ansible Inventories for AWS -Requires: %{name}-inventory +Requires: %{name}-inventory = %{version} Requires: python-boto BuildArch: noarch @@ -156,7 +158,7 @@ Ansible Inventories for AWS used with the openshift-ansible scripts and playbook %package inventory-gce Summary: Openshift and Atomic Enterprise Ansible Inventories for GCE -Requires: %{name}-inventory +Requires: %{name}-inventory = %{version} Requires: python-libcloud >= 0.13 BuildArch: noarch @@ -172,10 +174,10 @@ Ansible Inventories for GCE used with the openshift-ansible scripts and playbook # ---------------------------------------------------------------------------------- %package playbooks Summary: Openshift and Atomic Enterprise Ansible Playbooks -Requires: %{name} -Requires: %{name}-roles -Requires: %{name}-lookup-plugins -Requires: %{name}-filter-plugins +Requires: %{name} = %{version} +Requires: %{name}-roles = %{version} +Requires: %{name}-lookup-plugins = %{version} +Requires: %{name}-filter-plugins = %{version} BuildArch: noarch %description playbooks @@ -191,8 +193,8 @@ BuildArch: noarch %package roles Summary: Openshift and Atomic Enterprise Ansible roles Requires: %{name} -Requires: %{name}-lookup-plugins -Requires: %{name}-filter-plugins +Requires: %{name}-lookup-plugins = %{version} +Requires: %{name}-filter-plugins = %{version} BuildArch: noarch %description roles @@ -238,9 +240,7 @@ BuildArch: noarch %package -n atomic-openshift-utils Summary: Atomic OpenShift Utilities BuildRequires: python-setuptools -Requires: openshift-ansible-playbooks -Requires: openshift-ansible-roles -Requires: ansible +Requires: %{name}-playbooks >= %{version} Requires: python-click Requires: python-setuptools Requires: PyYAML @@ -258,6 +258,77 @@ Atomic OpenShift Utilities includes %changelog +* Thu Nov 19 2015 Brenton Leanhardt <bleanhar@redhat.com> 3.0.14-1 +- added metric items to zabbix for openshift online (mwoodson@redhat.com) +- Updating usergroups to accept users (kwoodson@redhat.com) +- Differentiate machine types on GCE (master and nodes) + (romain.dossin@amadeus.com) +- Uninstall - Remove systemd wants file for node (jdetiber@redhat.com) +- ec2 - force !requiretty for ssh_user (jdetiber@redhat.com) +- small tweaks for adding docker volume for aws master hosts + (jdetiber@redhat.com) +- Created role to deploy ops host monitoring (jdiaz@redhat.com) +- Update certificate paths when 'names' key is provided. (abutcher@redhat.com) +- add a volume on master host, in AWS provisioning (chengcheng.mu@amadeus.com) +- First attempt at adding web scenarios (kwoodson@redhat.com) +- Use field numbers for all formats in bin/cluster for python 2.6 + (abutcher@redhat.com) +- atomic-openshift-installer: Correct single master case (smunilla@redhat.com) +- added copr-openshift-ansible releaser, removed old rel-eng stuff. + (twiest@redhat.com) +- changed counter -> count (mwoodson@redhat.com) +- Updating zbx_item classes to support data types for bool. + (kwoodson@redhat.com) +- Fix ec2 instance type override (jdetiber@redhat.com) +- updated my check to support the boolean data type (mwoodson@redhat.com) +- Add additive_facts_to_overwrite instead of overwriting all additive_facts + (abutcher@redhat.com) +- added healthz check and more pod count checks (mwoodson@redhat.com) +- updating to the latest ec2.py (and re-patching with our changes). + (twiest@redhat.com) +- atomic-openshift-installer: Temporarily restrict to single master + (smunilla@redhat.com) +- openshift-ansible: Correct variable (smunilla@redhat.com) +- Refactor named certificates. (abutcher@redhat.com) +- atomic-openshift-utils: Version lock playbooks (smunilla@redhat.com) +- Add the native ha services and configs to uninstall (jdetiber@redhat.com) +- Bug 1282336 - Add additional seboolean for gluster (jdetiber@redhat.com) +- Raise lifetime to 2 weeks for dynamic AWS items (jdiaz@redhat.com) +- bin/cluster fix python 2.6 issue (jdetiber@redhat.com) +- cluster list: break host types by subtype (lhuard@amadeus.com) +- README_AWS: Add needed dependency (c.witt.1900@gmail.com) +- Fix invalid sudo command test (takayoshi@gmail.com) +- Docs: Fedora: Add missing dependencies and update to dnf. (public@omeid.me) +- Gate upgrade steps for 3.0 to 3.1 upgrade (jdetiber@redhat.com) +- added the tito and copr_cli roles (twiest@redhat.com) +- pylint openshift_facts (jdetiber@redhat.com) +- Update etcd default facts setting (jdetiber@redhat.com) +- Update master facts prior to upgrading incase facts are missing. + (abutcher@redhat.com) +- pre-upgrade-check: differentiates between port and targetPort in output + (smilner@redhat.com) +- Better structure the output of the list playbook (lhuard@amadeus.com) +- Add the sub-host-type tag to the libvirt VMs (lhuard@amadeus.com) +- atomic-openshift-installer: Update nopwd sudo test (smunilla@redhat.com) +- Fix pylint import errors for utils/test/. (dgoodwin@redhat.com) +- atomic-openshift-installer: Update prompts and help messages + (smunilla@redhat.com) +- Dependencies need to be added when a create occurs on SLA object. + (kwoodson@redhat.com) +- Test additions for cli_installer:get_hosts_to_install_on + (bleanhar@redhat.com) +- adding itservice (kwoodson@redhat.com) +- remove netaddr dependency (tob@butter.sh) +- Add pyOpenSSL to dependencies for Fedora. (public@omeid.me) +- Vagrant RHEL registration cleanup (pep@redhat.com) +- RH subscription: optional satellite and pkg update (pep@redhat.com) + +* Tue Nov 17 2015 Brenton Leanhardt <bleanhar@redhat.com> 3.0.13-1 +- The aep3 images changed locations. (bleanhar@redhat.com) +- atomic-openshift-installer: Correct single master case (smunilla@redhat.com) +- atomic-openshift-installer: Temporarily restrict to single master + (smunilla@redhat.com) + * Wed Nov 11 2015 Brenton Leanhardt <bleanhar@redhat.com> 3.0.12-1 - Sync with the latest image streams (sdodson@redhat.com) diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index e0dbad900..1a55eb053 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -103,7 +103,7 @@ - shell: find /var/lib/openshift/openshift.local.volumes -type d -exec umount {} \; 2>/dev/null || true changed_when: False - - shell: docker rm -f "{{ item }}"-master "{{ item }}"-node + - shell: docker rm -f "{{ item }}"-master "{{ item }}"-node changed_when: False failed_when: False with_items: @@ -152,11 +152,16 @@ - /etc/sysconfig/atomic-enterprise-master - /etc/sysconfig/atomic-enterprise-node - /etc/sysconfig/atomic-openshift-master + - /etc/sysconfig/atomic-openshift-master-api + - /etc/sysconfig/atomic-openshift-master-controllers - /etc/sysconfig/atomic-openshift-node - /etc/sysconfig/openshift-master - /etc/sysconfig/openshift-node - /etc/sysconfig/origin-master + - /etc/sysconfig/origin-master-api + - /etc/sysconfig/origin-master-controllers - /etc/sysconfig/origin-node + - /etc/systemd/system/atomic-openshift-node.service.wants - /root/.kube - /run/openshift-sdn - /usr/share/openshift/examples @@ -165,6 +170,16 @@ - /var/lib/openshift - /var/lib/origin - /var/lib/pacemaker + - /usr/lib/systemd/system/atomic-openshift-master-api.service + - /usr/lib/systemd/system/atomic-openshift-master-controllers.service + - /usr/lib/systemd/system/origin-master-api.service + - /usr/lib/systemd/system/origin-master-controllers.service + + # Since we are potentially removing the systemd unit files for separated + # master-api and master-controllers services, so we need to reload the + # systemd configuration manager + - name: Reload systemd manager configuration + command: systemctl daemon-reload - name: restart docker service: name=docker state=restarted diff --git a/playbooks/aws/openshift-cluster/tasks/launch_instances.yml b/playbooks/aws/openshift-cluster/tasks/launch_instances.yml index 9c699120b..22c617fea 100644 --- a/playbooks/aws/openshift-cluster/tasks/launch_instances.yml +++ b/playbooks/aws/openshift-cluster/tasks/launch_instances.yml @@ -20,10 +20,6 @@ | default(deployment_vars[deployment_type].image, true) }}" when: ec2_image is not defined and not ec2_image_name - set_fact: - ec2_instance_type: "{{ lookup('env', 'ec2_instance_type') - | default(deployment_vars[deployment_type].type, true) }}" - when: ec2_instance_type is not defined -- set_fact: ec2_keypair: "{{ lookup('env', 'ec2_keypair') | default(deployment_vars[deployment_type].keypair, true) }}" when: ec2_keypair is not defined @@ -37,25 +33,25 @@ when: ec2_assign_public_ip is not defined - set_fact: - ec2_instance_type: "{{ ec2_master_instance_type | default(deployment_vars[deployment_type].type, true) }}" + ec2_instance_type: "{{ ec2_master_instance_type | default(lookup('env', 'ec2_master_instance_type') | default(lookup('env', 'ec2_instance_type') | default(deployment_vars[deployment_type].type))) }}" ec2_security_groups: "{{ ec2_master_security_groups | default(deployment_vars[deployment_type].security_groups, true) }}" when: host_type == "master" and sub_host_type == "default" - set_fact: - ec2_instance_type: "{{ ec2_etcd_instance_type | default(deployment_vars[deployment_type].type, true) }}" + ec2_instance_type: "{{ ec2_etcd_instance_type | default(lookup('env', 'ec2_etcd_instance_type') | default(lookup('env', 'ec2_instance_type') | default(deployment_vars[deployment_type].type))) }}" ec2_security_groups: "{{ ec2_etcd_security_groups | default(deployment_vars[deployment_type].security_groups, true)}}" when: host_type == "etcd" and sub_host_type == "default" - set_fact: - ec2_instance_type: "{{ ec2_infra_instance_type | default(deployment_vars[deployment_type].type, true) }}" + ec2_instance_type: "{{ ec2_infra_instance_type | default(lookup('env', 'ec2_infra_instance_type') | default(lookup('env', 'ec2_instance_type') | default(deployment_vars[deployment_type].type))) }}" ec2_security_groups: "{{ ec2_infra_security_groups | default(deployment_vars[deployment_type].security_groups, true) }}" when: host_type == "node" and sub_host_type == "infra" - set_fact: - ec2_instance_type: "{{ ec2_node_instance_type | default(deployment_vars[deployment_type].type, true) }}" + ec2_instance_type: "{{ ec2_node_instance_type | default(lookup('env', 'ec2_node_instance_type') | default(lookup('env', 'ec2_instance_type') | default(deployment_vars[deployment_type].type))) }}" ec2_security_groups: "{{ ec2_node_security_groups | default(deployment_vars[deployment_type].security_groups, true) }}" when: host_type == "node" and sub_host_type == "compute" @@ -81,7 +77,6 @@ - set_fact: latest_ami: "{{ ami_result.results | oo_ami_selector(ec2_image_name) }}" - user_data: "{{ lookup('template', '../templates/user_data.j2') }}" volume_defs: etcd: root: @@ -97,6 +92,10 @@ volume_size: "{{ lookup('env', 'os_master_root_vol_size') | default(25, true) }}" device_type: "{{ lookup('env', 'os_master_root_vol_type') | default('gp2', true) }}" iops: "{{ lookup('env', 'os_master_root_vol_iops') | default(500, true) }}" + docker: + volume_size: "{{ lookup('env', 'os_docker_vol_size') | default(10, true) }}" + device_type: "{{ lookup('env', 'os_docker_vol_type') | default('gp2', true) }}" + iops: "{{ lookup('env', 'os_docker_vol_iops') | default(500, true) }}" node: root: volume_size: "{{ lookup('env', 'os_node_root_vol_size') | default(85, true) }}" @@ -121,7 +120,7 @@ count: "{{ instances | length }}" vpc_subnet_id: "{{ ec2_vpc_subnet | default(omit, true) }}" assign_public_ip: "{{ ec2_assign_public_ip | default(omit, true) }}" - user_data: "{{ user_data }}" + user_data: "{{ lookup('template', '../templates/user_data.j2') }}" wait: yes instance_tags: created-by: "{{ created_by }}" diff --git a/playbooks/aws/openshift-cluster/templates/user_data.j2 b/playbooks/aws/openshift-cluster/templates/user_data.j2 index 82c2f4d57..3621a7d7d 100644 --- a/playbooks/aws/openshift-cluster/templates/user_data.j2 +++ b/playbooks/aws/openshift-cluster/templates/user_data.j2 @@ -1,5 +1,5 @@ #cloud-config -{% if type =='etcd' %} +{% if type == 'etcd' and 'etcd' in volume_defs[type] %} cloud_config_modules: - disk_setup - mounts @@ -19,7 +19,7 @@ fs_setup: partition: auto {% endif %} -{% if type == 'node' %} +{% if type in ['node', 'master'] and 'docker' in volume_defs[type] %} mounts: - [ xvdb ] - [ ephemeral0 ] @@ -43,3 +43,10 @@ growpart: runcmd: - xfs_growfs /var {% endif %} + +{% if deployment_vars[deployment_type].sudo %} +- path: /etc/sudoers.d/99-{{ deployment_vars[deployment_type].ssh_user }}-cloud-init-requiretty + permissions: 440 + content: | + Defaults:{{ deployment_vars[deployment_type].ssh_user }} !requiretty +{% endif %} diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index b1da85d5d..ff1579218 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -204,14 +204,6 @@ validate_checksum: yes with_items: masters_needing_certs -- name: Inspect named certificates - hosts: oo_first_master - tasks: - - name: Collect certificate names - set_fact: - parsed_named_certificates: "{{ openshift_master_named_certificates | oo_parse_certificate_names(master_cert_config_dir, openshift.common.internal_hostnames) }}" - when: openshift_master_named_certificates is defined - - name: Compute haproxy_backend_servers hosts: localhost connection: local @@ -272,11 +264,55 @@ | map(attribute='stdout') | list) }}" +- name: Parse named certificates + hosts: localhost + vars: + internal_hostnames: "{{ hostvars[groups.oo_first_master.0].openshift.common.internal_hostnames }}" + named_certificates: "{{ hostvars[groups.oo_first_master.0].openshift_master_named_certificates | default([]) }}" + named_certificates_dir: "{{ hostvars[groups.oo_first_master.0].master_cert_config_dir }}/named_certificates/" + tasks: + - set_fact: + parsed_named_certificates: "{{ named_certificates | oo_parse_named_certificates(named_certificates_dir, internal_hostnames) }}" + when: named_certificates | length > 0 + +- name: Deploy named certificates + hosts: oo_masters_to_config + vars: + named_certs_dir: "{{ master_cert_config_dir }}/named_certificates/" + named_certs_specified: "{{ openshift_master_named_certificates is defined }}" + overwrite_named_certs: "{{ openshift_master_overwrite_named_certificates | default(false) }}" + roles: + - role: openshift_facts + post_tasks: + - openshift_facts: + role: master + local_facts: + named_certificates: "{{ hostvars.localhost.parsed_named_certificates | default([]) }}" + additive_facts_to_overwrite: + - "{{ 'master.named_certificates' if overwrite_named_certs | bool else omit }}" + - name: Clear named certificates + file: + path: "{{ named_certs_dir }}" + state: absent + when: overwrite_named_certs | bool + - name: Ensure named certificate directory exists + file: + path: "{{ named_certs_dir }}" + state: directory + when: named_certs_specified | bool + - name: Land named certificates + copy: src="{{ item.certfile }}" dest="{{ named_certs_dir }}" + with_items: openshift_master_named_certificates + when: named_certs_specified | bool + - name: Land named certificate keys + copy: src="{{ item.keyfile }}" dest="{{ named_certs_dir }}" + with_items: openshift_master_named_certificates + when: named_certs_specified | bool + - name: Configure master instances hosts: oo_masters_to_config serial: 1 vars: - named_certificates: "{{ hostvars[groups['oo_first_master'][0]]['parsed_named_certificates'] | default([])}}" sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}" openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}" openshift_master_count: "{{ groups.oo_masters_to_config | length }}" diff --git a/playbooks/gce/openshift-cluster/launch.yml b/playbooks/gce/openshift-cluster/launch.yml index 8be5d53e7..d6ef57c45 100644 --- a/playbooks/gce/openshift-cluster/launch.yml +++ b/playbooks/gce/openshift-cluster/launch.yml @@ -16,6 +16,8 @@ cluster: "{{ cluster_id }}" type: "{{ k8s_type }}" g_sub_host_type: "default" + gce_machine_type: "{{ lookup('env', 'gce_machine_master_type') | default(lookup('env', 'gce_machine_type'), true) }}" + gce_machine_image: "{{ lookup('env', 'gce_machine_master_image') | default(lookup('env', 'gce_machine_image'), true) }}" - include: ../../common/openshift-cluster/tasks/set_node_launch_facts.yml vars: @@ -27,6 +29,8 @@ cluster: "{{ cluster_id }}" type: "{{ k8s_type }}" g_sub_host_type: "{{ sub_host_type }}" + gce_machine_type: "{{ lookup('env', 'gce_machine_node_type') | default(lookup('env', 'gce_machine_type'), true) }}" + gce_machine_image: "{{ lookup('env', 'gce_machine_node_image') | default(lookup('env', 'gce_machine_image'), true) }}" - include: ../../common/openshift-cluster/tasks/set_node_launch_facts.yml vars: diff --git a/playbooks/gce/openshift-cluster/tasks/launch_instances.yml b/playbooks/gce/openshift-cluster/tasks/launch_instances.yml index c428cb465..de8a75b18 100644 --- a/playbooks/gce/openshift-cluster/tasks/launch_instances.yml +++ b/playbooks/gce/openshift-cluster/tasks/launch_instances.yml @@ -5,8 +5,8 @@ - name: Launch instance(s) gce: instance_names: "{{ instances }}" - machine_type: "{{ lookup('env', 'gce_machine_type') | default('n1-standard-1', true) }}" - image: "{{ lookup('env', 'gce_machine_image') | default(deployment_vars[deployment_type].image, true) }}" + machine_type: "{{ gce_machine_type | default(deployment_vars[deployment_type].machine_type, true) }}" + image: "{{ gce_machine_image | default(deployment_vars[deployment_type].image, true) }}" service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" project_id: "{{ lookup('env', 'gce_project_id') }}" diff --git a/playbooks/gce/openshift-cluster/vars.yml b/playbooks/gce/openshift-cluster/vars.yml index 6de007807..a8ce8eb22 100644 --- a/playbooks/gce/openshift-cluster/vars.yml +++ b/playbooks/gce/openshift-cluster/vars.yml @@ -5,13 +5,16 @@ sdn_network_plugin: redhat/openshift-ovs-subnet deployment_vars: origin: image: preinstalled-slave-50g-v5 + machine_type: n1-standard-1 ssh_user: root sudo: yes online: image: libra-rhel7 + machine_type: n1-standard-1 ssh_user: root sudo: no enterprise: image: rhel-7 + machine_type: n1-standard-1 ssh_user: sudo: yes diff --git a/rel-eng/packages/.readme b/rel-eng/packages/.readme deleted file mode 100644 index 8999c8dbc..000000000 --- a/rel-eng/packages/.readme +++ /dev/null @@ -1,3 +0,0 @@ -the rel-eng/packages directory contains metadata files -named after their packages. Each file has the latest tagged -version and the project's relative directory. diff --git a/rel-eng/packages/openshift-ansible-bin b/rel-eng/packages/openshift-ansible-bin deleted file mode 100644 index 11c2906f0..000000000 --- a/rel-eng/packages/openshift-ansible-bin +++ /dev/null @@ -1 +0,0 @@ -0.0.19-1 bin/ diff --git a/rel-eng/packages/openshift-ansible-inventory b/rel-eng/packages/openshift-ansible-inventory deleted file mode 100644 index 733c626cf..000000000 --- a/rel-eng/packages/openshift-ansible-inventory +++ /dev/null @@ -1 +0,0 @@ -0.0.9-1 inventory/ diff --git a/rel-eng/tito.props b/rel-eng/tito.props deleted file mode 100644 index eab3f190d..000000000 --- a/rel-eng/tito.props +++ /dev/null @@ -1,5 +0,0 @@ -[buildconfig] -builder = tito.builder.Builder -tagger = tito.tagger.VersionTagger -changelog_do_not_remove_cherrypick = 0 -changelog_format = %s (%ae) diff --git a/roles/lib_zabbix/library/zbx_httptest.py b/roles/lib_zabbix/library/zbx_httptest.py new file mode 100644 index 000000000..96733b3d1 --- /dev/null +++ b/roles/lib_zabbix/library/zbx_httptest.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python +''' + Ansible module for zabbix httpservice +''' +# vim: expandtab:tabstop=4:shiftwidth=4 +# +# Zabbix item ansible module +# +# +# Copyright 2015 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This is in place because each module looks similar to each other. +# These need duplicate code as their behavior is very similar +# but different for each zabbix class. +# pylint: disable=duplicate-code + +# pylint: disable=import-error +from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection + +def exists(content, key='result'): + ''' Check if key exists in content or the size of content[key] > 0 + ''' + if not content.has_key(key): + return False + + if not content[key]: + return False + + return True + +def get_authentication_method(auth): + ''' determine authentication type''' + rval = 0 + if 'basic' in auth: + rval = 1 + elif 'ntlm' in auth: + rval = 2 + + return rval + +def get_verify_host(verify): + ''' + get the values for verify_host + ''' + if verify: + return 1 + + return 0 + +def get_app_id(zapi, application): + ''' + get related templates + ''' + # Fetch templates by name + content = zapi.get_content('application', + 'get', + {'search': {'name': application}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + return content['result'][0]['applicationid'] + + return None + +def get_template_id(zapi, template_name): + ''' + get related templates + ''' + # Fetch templates by name + content = zapi.get_content('template', + 'get', + {'search': {'host': template_name}, + 'selectApplications': ['applicationid', 'name']}) + if content.has_key('result'): + return content['result'][0]['templateid'] + + return None + +def get_host_id_by_name(zapi, host_name): + '''Get host id by name''' + content = zapi.get_content('host', + 'get', + {'filter': {'name': host_name}}) + + return content['result'][0]['hostid'] + +def get_status(status): + ''' Determine the status of the web scenario ''' + rval = 0 + if 'disabled' in status: + return 1 + + return rval + +def find_step(idx, step_list): + ''' find step by index ''' + for step in step_list: + if str(step['no']) == str(idx): + return step + + return None + +def steps_equal(zab_steps, user_steps): + '''compare steps returned from zabbix + and steps passed from user + ''' + + if len(user_steps) != len(zab_steps): + return False + + for idx in range(1, len(user_steps)+1): + + user = find_step(idx, user_steps) + zab = find_step(idx, zab_steps) + + for key, value in user.items(): + if str(value) != str(zab[key]): + return False + + return True + +# The branches are needed for CRUD and error handling +# pylint: disable=too-many-branches +def main(): + ''' + ansible zabbix module for zbx_item + ''' + + module = AnsibleModule( + argument_spec=dict( + zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), + zbx_user=dict(default=os.environ.get('ZABBIX_USER', None), type='str'), + zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'), + zbx_debug=dict(default=False, type='bool'), + name=dict(default=None, require=True, type='str'), + agent=dict(default=None, type='str'), + template_name=dict(default=None, type='str'), + host_name=dict(default=None, type='str'), + interval=dict(default=60, type='int'), + application=dict(default=None, type='str'), + authentication=dict(default=None, type='str'), + http_user=dict(default=None, type='str'), + http_password=dict(default=None, type='str'), + state=dict(default='present', type='str'), + status=dict(default='enabled', type='str'), + steps=dict(default='present', type='list'), + verify_host=dict(default=False, type='bool'), + retries=dict(default=1, type='int'), + headers=dict(default=None, type='dict'), + query_type=dict(default='filter', choices=['filter', 'search'], type='str'), + ), + #supports_check_mode=True + mutually_exclusive=[['template_name', 'host_name']], + ) + + zapi = ZabbixAPI(ZabbixConnection(module.params['zbx_server'], + module.params['zbx_user'], + module.params['zbx_password'], + module.params['zbx_debug'])) + + #Set the instance and the template for the rest of the calls + zbx_class_name = 'httptest' + state = module.params['state'] + hostid = None + + # If a template name was passed then accept the template + if module.params['template_name']: + hostid = get_template_id(zapi, module.params['template_name']) + else: + hostid = get_host_id_by_name(zapi, module.params['host_name']) + + # Fail if a template was not found matching the name + if not hostid: + module.exit_json(failed=True, + changed=False, + results='Error: Could find template or host with name [%s].' % + (module.params.get('template_name', module.params['host_name'])), + state="Unkown") + + content = zapi.get_content(zbx_class_name, + 'get', + {module.params['query_type']: {'name': module.params['name']}, + 'selectSteps': 'extend', + }) + + #******# + # GET + #******# + if state == 'list': + module.exit_json(changed=False, results=content['result'], state="list") + + #******# + # DELETE + #******# + if state == 'absent': + if not exists(content): + module.exit_json(changed=False, state="absent") + + content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0]['httptestid']]) + module.exit_json(changed=True, results=content['result'], state="absent") + + # Create and Update + if state == 'present': + + params = {'name': module.params['name'], + 'hostid': hostid, + 'agent': module.params['agent'], + 'retries': module.params['retries'], + 'steps': module.params['steps'], + 'applicationid': get_app_id(zapi, module.params['application']), + 'delay': module.params['interval'], + 'verify_host': get_verify_host(module.params['verify_host']), + 'status': get_status(module.params['status']), + 'headers': module.params['headers'], + 'http_user': module.params['http_user'], + 'http_password': module.params['http_password'], + } + + + # Remove any None valued params + _ = [params.pop(key, None) for key in params.keys() if params[key] is None] + + #******# + # CREATE + #******# + if not exists(content): + content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state='present') + + + ######## + # UPDATE + ######## + differences = {} + zab_results = content['result'][0] + for key, value in params.items(): + + if key == 'steps': + if not steps_equal(zab_results[key], value): + differences[key] = value + + elif zab_results[key] != value and zab_results[key] != str(value): + differences[key] = value + + # We have differences and need to update + if not differences: + module.exit_json(changed=False, results=zab_results, state="present") + + differences['httptestid'] = zab_results['httptestid'] + content = zapi.get_content(zbx_class_name, 'update', differences) + + if content.has_key('error'): + module.exit_json(failed=True, changed=False, results=content['error'], state="present") + + module.exit_json(changed=True, results=content['result'], state="present") + + module.exit_json(failed=True, + changed=False, + results='Unknown state passed. %s' % state, + state="unknown") + +# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled +# import module snippets. This are required +from ansible.module_utils.basic import * + +main() diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py index 5dc3cff9b..996c98fae 100644 --- a/roles/lib_zabbix/library/zbx_item.py +++ b/roles/lib_zabbix/library/zbx_item.py @@ -41,6 +41,24 @@ def exists(content, key='result'): return True +def get_data_type(data_type): + ''' + Possible values: + 0 - decimal; + 1 - octal; + 2 - hexadecimal; + 3 - bool; + ''' + vtype = 0 + if 'octal' in data_type: + vtype = 1 + elif 'hexadecimal' in data_type: + vtype = 2 + elif 'bool' in data_type: + vtype = 3 + + return vtype + def get_value_type(value_type): ''' Possible values: @@ -158,6 +176,7 @@ def main(): template_name=dict(default=None, type='str'), zabbix_type=dict(default='trapper', type='str'), value_type=dict(default='int', type='str'), + data_type=dict(default='decimal', type='str'), interval=dict(default=60, type='int'), delta=dict(default=0, type='int'), multiplier=dict(default=None, type='str'), @@ -219,6 +238,7 @@ def main(): 'hostid': templateid[0], 'type': get_zabbix_type(module.params['zabbix_type']), 'value_type': get_value_type(module.params['value_type']), + 'data_type': get_data_type(module.params['data_type']), 'applications': get_app_ids(module.params['applications'], app_name_ids), 'formula': formula, 'multiplier': use_multiplier, diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py index 43498c015..aca9c8336 100644 --- a/roles/lib_zabbix/library/zbx_itemprototype.py +++ b/roles/lib_zabbix/library/zbx_itemprototype.py @@ -116,6 +116,24 @@ def get_zabbix_type(ztype): return _vtype +def get_data_type(data_type): + ''' + Possible values: + 0 - decimal; + 1 - octal; + 2 - hexadecimal; + 3 - bool; + ''' + vtype = 0 + if 'octal' in data_type: + vtype = 1 + elif 'hexadecimal' in data_type: + vtype = 2 + elif 'bool' in data_type: + vtype = 3 + + return vtype + def get_value_type(value_type): ''' Possible values: @@ -175,6 +193,7 @@ def main(): interfaceid=dict(default=None, type='int'), zabbix_type=dict(default='trapper', type='str'), value_type=dict(default='float', type='str'), + data_type=dict(default='decimal', type='str'), delay=dict(default=60, type='int'), lifetime=dict(default=30, type='int'), state=dict(default='present', type='str'), @@ -238,6 +257,7 @@ def main(): 'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']), 'type': get_zabbix_type(module.params['zabbix_type']), 'value_type': get_value_type(module.params['value_type']), + 'data_type': get_data_type(module.params['data_type']), 'applications': get_app_ids(zapi, module.params['applications'], template['templateid']), 'formula': formula, 'multiplier': use_multiplier, diff --git a/roles/lib_zabbix/library/zbx_usergroup.py b/roles/lib_zabbix/library/zbx_usergroup.py index 297d8ef91..3fd44d80c 100644 --- a/roles/lib_zabbix/library/zbx_usergroup.py +++ b/roles/lib_zabbix/library/zbx_usergroup.py @@ -27,6 +27,10 @@ zabbix ansible module for usergroups # but different for each zabbix class. # pylint: disable=duplicate-code +# Disabling too-many-branches as we need the error checking and the if-statements +# to determine the proper state +# pylint: disable=too-many-branches + # pylint: disable=import-error from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection @@ -92,26 +96,24 @@ def get_user_status(status): return 1 -#def get_userids(zapi, users): -# ''' Get userids from user aliases -# ''' -# if not users: -# return None -# -# userids = [] -# for alias in users: -# content = zapi.get_content('user', 'get', {'search': {'alias': alias}}) -# if content['result']: -# userids.append(content['result'][0]['userid']) -# -# return userids +def get_userids(zapi, users): + ''' Get userids from user aliases + ''' + if not users: + return None + + userids = [] + for alias in users: + content = zapi.get_content('user', 'get', {'search': {'alias': alias}}) + if content['result']: + userids.append(content['result'][0]['userid']) + + return userids def main(): ''' Ansible module for usergroup ''' - ##def usergroup(self, name, rights=None, users=None, state='present', params=None): - module = AnsibleModule( argument_spec=dict( zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'), @@ -123,7 +125,7 @@ def main(): status=dict(default='enabled', type='str'), name=dict(default=None, type='str', required=True), rights=dict(default=None, type='list'), - #users=dict(default=None, type='list'), + users=dict(default=None, type='list'), state=dict(default='present', type='str'), ), #supports_check_mode=True @@ -144,9 +146,15 @@ def main(): {'search': {'name': uname}, 'selectUsers': 'userid', }) + #******# + # GET + #******# if state == 'list': module.exit_json(changed=False, results=content['result'], state="list") + #******# + # DELETE + #******# if state == 'absent': if not exists(content): module.exit_json(changed=False, state="absent") @@ -157,6 +165,7 @@ def main(): content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]]) module.exit_json(changed=True, results=content['result'], state="absent") + # Create and Update if state == 'present': params = {'name': uname, @@ -164,26 +173,37 @@ def main(): 'users_status': get_user_status(module.params['status']), 'gui_access': get_gui_access(module.params['gui_access']), 'debug_mode': get_debug_mode(module.params['debug_mode']), - #'userids': get_userids(zapi, module.params['users']), + 'userids': get_userids(zapi, module.params['users']), } + # Remove any None valued params _ = [params.pop(key, None) for key in params.keys() if params[key] == None] + #******# + # CREATE + #******# if not exists(content): # if we didn't find it, create it content = zapi.get_content(zbx_class_name, 'create', params) + + if content.has_key('error'): + module.exit_json(failed=True, changed=True, results=content['error'], state="present") + module.exit_json(changed=True, results=content['result'], state='present') - # already exists, we need to update it - # let's compare properties + + + ######## + # UPDATE + ######## differences = {} zab_results = content['result'][0] for key, value in params.items(): if key == 'rights': differences['rights'] = value - #elif key == 'userids' and zab_results.has_key('users'): - #if zab_results['users'] != value: - #differences['userids'] = value + elif key == 'userids' and zab_results.has_key('users'): + if zab_results['users'] != value: + differences['userids'] = value elif zab_results[key] != value and zab_results[key] != str(value): differences[key] = value diff --git a/roles/lib_zabbix/tasks/create_template.yml b/roles/lib_zabbix/tasks/create_template.yml index 44c4e6766..2992505bf 100644 --- a/roles/lib_zabbix/tasks/create_template.yml +++ b/roles/lib_zabbix/tasks/create_template.yml @@ -33,6 +33,7 @@ key: "{{ item.key }}" name: "{{ item.name | default(item.key, true) }}" value_type: "{{ item.value_type | default('int') }}" + data_type: "{{ item.data_type | default('decimal') }}" description: "{{ item.description | default('', True) }}" multiplier: "{{ item.multiplier | default('', True) }}" units: "{{ item.units | default('', True) }}" @@ -81,6 +82,7 @@ key: "{{ item.key }}" discoveryrule_key: "{{ item.discoveryrule_key }}" value_type: "{{ item.value_type }}" + data_type: "{{ item.data_type | default('decimal') }}" template_name: "{{ template.name }}" applications: "{{ item.applications }}" description: "{{ item.description | default('', True) }}" diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 091ba4e2b..6006bfa9d 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -651,7 +651,7 @@ def set_deployment_facts_if_unset(facts): if deployment_type in ['enterprise', 'online', 'openshift-enterprise']: registry_url = 'openshift3/ose-${component}:${version}' elif deployment_type == 'atomic-enterprise': - registry_url = 'aep3/aep-${component}:${version}' + registry_url = 'aep3_beta/aep-${component}:${version}' facts[role]['registry_url'] = registry_url if 'master' in facts: @@ -864,20 +864,38 @@ def apply_provider_facts(facts, provider_facts): return facts -def merge_facts(orig, new): +def merge_facts(orig, new, additive_facts_to_overwrite): """ Recursively merge facts dicts Args: orig (dict): existing facts new (dict): facts to update + + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] + Returns: dict: the merged facts """ + additive_facts = ['named_certificates'] facts = dict() for key, value in orig.iteritems(): if key in new: if isinstance(value, dict) and isinstance(new[key], dict): - facts[key] = merge_facts(value, new[key]) + relevant_additive_facts = [] + # Keep additive_facts_to_overwrite if key matches + for item in additive_facts_to_overwrite: + if '.' in item and item.startswith(key + '.'): + relevant_additive_facts.append(item) + facts[key] = merge_facts(value, new[key], relevant_additive_facts) + elif key in additive_facts and key not in [x.split('.')[-1] for x in additive_facts_to_overwrite]: + # Fact is additive so we'll combine orig and new. + if isinstance(value, list) and isinstance(new[key], list): + new_fact = [] + for item in copy.deepcopy(value) + copy.copy(new[key]): + if item not in new_fact: + new_fact.append(item) + facts[key] = new_fact else: facts[key] = copy.copy(new[key]) else: @@ -961,13 +979,15 @@ class OpenShiftFacts(object): role (str): role for setting local facts filename (str): local facts file to use local_facts (dict): local facts to set + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] Raises: OpenShiftFactsUnsupportedRoleError: """ known_roles = ['common', 'master', 'node', 'master_sdn', 'node_sdn', 'dns', 'etcd'] - def __init__(self, role, filename, local_facts): + def __init__(self, role, filename, local_facts, additive_facts_to_overwrite=False): self.changed = False self.filename = filename if role not in self.known_roles: @@ -976,25 +996,27 @@ class OpenShiftFacts(object): ) self.role = role self.system_facts = ansible_facts(module) - self.facts = self.generate_facts(local_facts) + self.facts = self.generate_facts(local_facts, additive_facts_to_overwrite) - def generate_facts(self, local_facts): + def generate_facts(self, local_facts, additive_facts_to_overwrite): """ Generate facts Args: local_facts (dict): local_facts for overriding generated defaults + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] Returns: dict: The generated facts """ - local_facts = self.init_local_facts(local_facts) + local_facts = self.init_local_facts(local_facts, additive_facts_to_overwrite) roles = local_facts.keys() defaults = self.get_defaults(roles) provider_facts = self.init_provider_facts() facts = apply_provider_facts(defaults, provider_facts) - facts = merge_facts(facts, local_facts) + facts = merge_facts(facts, local_facts, additive_facts_to_overwrite) facts['current_config'] = get_current_config(facts) facts = set_url_facts_if_unset(facts) facts = set_project_cfg_facts_if_unset(facts) @@ -1132,11 +1154,13 @@ class OpenShiftFacts(object): ) return provider_facts - def init_local_facts(self, facts=None): + def init_local_facts(self, facts=None, additive_facts_to_overwrite=False): """ Initialize the provider facts Args: facts (dict): local facts to set + additive_facts_to_overwrite (list): additive facts to overwrite in jinja + '.' notation ex: ['master.named_certificates'] Returns: dict: The result of merging the provided facts with existing @@ -1154,7 +1178,7 @@ class OpenShiftFacts(object): basestring): facts_to_set[arg] = module.from_json(facts_to_set[arg]) - new_local_facts = merge_facts(local_facts, facts_to_set) + new_local_facts = merge_facts(local_facts, facts_to_set, additive_facts_to_overwrite) for facts in new_local_facts.values(): keys_to_delete = [] for fact, value in facts.iteritems(): @@ -1184,6 +1208,7 @@ def main(): role=dict(default='common', required=False, choices=OpenShiftFacts.known_roles), local_facts=dict(default=None, type='dict', required=False), + additive_facts_to_overwrite=dict(default=[], type='list', required=False), ), supports_check_mode=True, add_file_common_args=True, @@ -1191,9 +1216,10 @@ def main(): role = module.params['role'] local_facts = module.params['local_facts'] + additive_facts_to_overwrite = module.params['additive_facts_to_overwrite'] fact_file = '/etc/ansible/facts.d/openshift.fact' - openshift_facts = OpenShiftFacts(role, fact_file, local_facts) + openshift_facts = OpenShiftFacts(role, fact_file, local_facts, additive_facts_to_overwrite) file_params = module.params.copy() file_params['path'] = fact_file diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index bb12a0a0f..2a37c06d9 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -27,9 +27,6 @@ corsAllowedOrigins: {% for custom_origin in openshift.master.custom_cors_origins | default("") %} - {{ custom_origin }} {% endfor %} -{% for name in (named_certificates | map(attribute='names')) | list | oo_flatten %} - - {{ name }} -{% endfor %} {% if 'disabled_features' in openshift.master %} disabledFeatures: {{ openshift.master.disabled_features | to_json }} {% endif %} @@ -144,9 +141,9 @@ servingInfo: keyFile: master.server.key maxRequestsInFlight: 500 requestTimeoutSeconds: 3600 -{% if named_certificates %} +{% if openshift.master.named_certificates %} namedCertificates: -{% for named_certificate in named_certificates %} +{% for named_certificate in openshift.master.named_certificates %} - certFile: {{ named_certificate['certfile'] }} keyFile: {{ named_certificate['keyfile'] }} names: diff --git a/roles/openshift_node/tasks/storage_plugins/glusterfs.yml b/roles/openshift_node/tasks/storage_plugins/glusterfs.yml index b812e81df..5cd4a6041 100644 --- a/roles/openshift_node/tasks/storage_plugins/glusterfs.yml +++ b/roles/openshift_node/tasks/storage_plugins/glusterfs.yml @@ -4,9 +4,14 @@ pkg: glusterfs-fuse state: installed -- name: Set seboolean to allow gluster storage plugin access from containers +- name: Set sebooleans to allow gluster storage plugin access from containers seboolean: - name: virt_use_fusefs + name: "{{ item }}" state: yes persistent: yes when: ansible_selinux and ansible_selinux.status == "enabled" + with_items: + - virt_use_fusefs + - virt_sandbox_use_fusefs + register: sebool_result + failed_when: "'state' not in sebool_result and 'msg' in sebool_result and 'SELinux boolean item does not exist' not in sebool_result.msg" diff --git a/roles/os_zabbix/vars/template_aws.yml b/roles/os_zabbix/vars/template_aws.yml index 0ed682128..57832a3fe 100644 --- a/roles/os_zabbix/vars/template_aws.yml +++ b/roles/os_zabbix/vars/template_aws.yml @@ -4,7 +4,7 @@ g_template_aws: zdiscoveryrules: - name: disc.aws key: disc.aws - lifetime: 1 + lifetime: 14 description: "Dynamically register AWS bucket info" zitemprototypes: diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 6defc4989..512adad4c 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,6 +13,19 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.api.ping + description: "Verify that the Openshift API is up" + type: int + applications: + - Openshift Master + + - key: openshift.master.api.healthz + description: "Checks the healthz check of the master's api: https://master_host/healthz" + type: int + data_type: bool + applications: + - Openshift Master + - key: openshift.master.user.count description: Shows number of users in a cluster type: int @@ -24,8 +37,26 @@ g_template_openshift_master: type: int applications: - Openshift Master - - - key: openshift.project.counter + + - key: openshift.master.pod.user.running.count + description: Shows number of user pods running (non infrastructure pods) + type: int + applications: + - Openshift Master + + - key: openshift.master.pod.total.count + description: Shows total number of pods (running and non running) + type: int + applications: + - Openshift Master + + - key: openshift.master.node.count + description: Shows the total number of nodes found in the Openshift Cluster + type: int + applications: + - Openshift Master + + - key: openshift.project.count description: Shows number of projects on a cluster type: int applications: @@ -103,12 +134,87 @@ g_template_openshift_master: applications: - Openshift Etcd + - key: openshift.master.metric.ping + description: "This check verifies that the https://master/metrics check is alive and communicating properly." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + + - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99 + description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed." + type: int + applications: + - Openshift Master Metrics + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg + - name: 'Openshift Master API health check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Openshift Master API PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: high + + - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' + priority: avg + - name: 'Openshift Master process not running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -125,7 +231,7 @@ g_template_openshift_master: priority: info - name: 'There are no projects running on {HOST.NAME}' - expression: '{Template Openshift Master:openshift.project.counter.last()}=0' + expression: '{Template Openshift Master:openshift.project.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info diff --git a/roles/oso_host_monitoring/README.md b/roles/oso_host_monitoring/README.md new file mode 100644 index 000000000..f1fa05adb --- /dev/null +++ b/roles/oso_host_monitoring/README.md @@ -0,0 +1,50 @@ +Role Name +========= + +Applies local host monitoring container(s). + +Requirements +------------ + +None. + +Role Variables +-------------- + +osohm_zagg_web_url: where to contact monitoring service +osohm_host_monitoring: name of host monitoring container +osohm_zagg_client: name of container with zabbix client +osohm_docker_registry_url: docker repository containing above containers +osohm_default_zagg_server_user: login info to zabbix server +osohm_default_zagg_password: password to zabbix server + +Dependencies +------------ + +None. + +Example Playbook +---------------- + +Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: + + - hosts: servers + roles: + - oso_host_monitoring + vars: + osohm_zagg_web_url: "https://..." + osohm_host_monitoring: "oso-rhel7-host-monitoring" + osohm_zagg_client: "oso-rhel7-zagg-client" + osohm_docker_registry_url: "docker-registry.example.com/mon/" + osohm_default_zagg_server_user: "zagg-client" + osohm_default_zagg_password: "secret" + +License +------- + +ASL 2.0 + +Author Information +------------------ + +OpenShift operations, Red Hat, Inc diff --git a/roles/oso_host_monitoring/defaults/main.yml b/roles/oso_host_monitoring/defaults/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/oso_host_monitoring/defaults/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/oso_host_monitoring/handlers/main.yml b/roles/oso_host_monitoring/handlers/main.yml new file mode 100644 index 000000000..7863ad15b --- /dev/null +++ b/roles/oso_host_monitoring/handlers/main.yml @@ -0,0 +1,12 @@ +--- +- name: "Restart the {{ osohm_host_monitoring }} service" + service: + name: "{{ osohm_host_monitoring }}" + state: restarted + enabled: yes + +- name: "Restart the {{ osohm_zagg_client }} service" + service: + name: "{{ osohm_zagg_client }}" + state: restarted + enabled: yes diff --git a/roles/oso_host_monitoring/meta/main.yml b/roles/oso_host_monitoring/meta/main.yml new file mode 100644 index 000000000..cce30c2db --- /dev/null +++ b/roles/oso_host_monitoring/meta/main.yml @@ -0,0 +1,8 @@ +--- +galaxy_info: + author: OpenShift + description: apply monitoring container(s). + company: Red Hat, Inc + license: ASL 2.0 + min_ansible_version: 1.2 +dependencies: [] diff --git a/roles/oso_host_monitoring/tasks/main.yml b/roles/oso_host_monitoring/tasks/main.yml new file mode 100644 index 000000000..6ddfa3dcb --- /dev/null +++ b/roles/oso_host_monitoring/tasks/main.yml @@ -0,0 +1,65 @@ +--- +- fail: + msg: "This playbook requires {{item}} to be set." + when: "{{ item }} is not defined or {{ item }} == ''" + with_items: + - osohm_zagg_web_url + - osohm_host_monitoring + - osohm_zagg_client + - osohm_docker_registry_url + - osohm_default_zagg_server_user + - osohm_default_zagg_server_password + +- name: create /etc/docker/ops + file: + path: /etc/docker/ops + state: directory + mode: 0770 + group: root + owner: root + +- name: Copy dockercfg to /etc/docker/ops + template: + src: docker-registry.ops.cfg.j2 + dest: /etc/docker/ops/.dockercfg + owner: root + group: root + mode: 0600 + +- name: "Copy {{ osohm_host_monitoring }} systemd file" + template: + src: "{{ osohm_host_monitoring }}.service.j2" + dest: "/etc/systemd/system/{{ osohm_host_monitoring }}.service" + owner: root + group: root + mode: 0644 + notify: + - "Restart the {{ osohm_host_monitoring }} service" + register: systemd_host_monitoring + +- name: "Copy {{ osohm_zagg_client }} systemd file" + template: + src: "{{ osohm_zagg_client }}.service.j2" + dest: "/etc/systemd/system/{{ osohm_zagg_client }}.service" + owner: root + group: root + mode: 0644 + notify: + - "Restart the {{ osohm_zagg_client }} service" + register: zagg_systemd + +- name: reload systemd + command: /usr/bin/systemctl --system daemon-reload + when: systemd_host_monitoring | changed or zagg_systemd | changed + +- name: "Start the {{ osohm_host_monitoring }} service" + service: + name: "{{ osohm_host_monitoring }}" + state: started + enabled: yes + +- name: "Start the {{ osohm_zagg_client }} service" + service: + name: "{{ osohm_zagg_client }}" + state: started + enabled: yes diff --git a/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 b/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 new file mode 100644 index 000000000..9e49da469 --- /dev/null +++ b/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 @@ -0,0 +1 @@ +{"{{ osohm_docker_registry_ops_url }}":{"auth":"{{ osohm_docker_registry_ops_key }}","email":"{{ osohm_docker_registry_ops_email }}"}} diff --git a/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 b/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 new file mode 100644 index 000000000..d18ad90fe --- /dev/null +++ b/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 @@ -0,0 +1,43 @@ +# This is a systemd file to run this docker container under systemd. +# To make this work: +# * pull the image (probably from ops docker registry) +# * place this file in /etc/systemd/system without the .systemd extension +# * run the commands: +# systemctl daemon-reload +# systemctl enable pcp-docker +# systemctl start pcp-docker +# +# +[Unit] +Description=PCP Collector Contatainer +Requires=docker.service +After=docker.service + + +[Service] +Type=simple +TimeoutStartSec=5m +Environment=HOME=/etc/docker/ops +#Slice=container-small.slice + +# systemd syntax '=-' ignore errors from return codes. +ExecStartPre=-/usr/bin/docker kill "{{ osohm_host_monitoring }}" +ExecStartPre=-/usr/bin/docker rm "{{ osohm_host_monitoring }}" +ExecStartPre=-/usr/bin/docker pull "{{ osohm_docker_registry_url }}{{ osohm_host_monitoring }}" + + +ExecStart=/usr/bin/docker run --rm --name="{{ osohm_host_monitoring }}" \ + --privileged --net=host --pid=host --ipc=host \ + -v /sys:/sys:ro -v /etc/localtime:/etc/localtime:ro \ + -v /var/lib/docker:/var/lib/docker:ro -v /run:/run \ + -v /var/log:/var/log \ + {{ osohm_docker_registry_url }}{{ osohm_host_monitoring }} + +ExecReload=-/usr/bin/docker stop "{{ osohm_host_monitoring }}" +ExecReload=-/usr/bin/docker rm "{{ osohm_host_monitoring }}" +ExecStop=-/usr/bin/docker stop "{{ osohm_host_monitoring }}" +Restart=always +RestartSec=30 + +[Install] +WantedBy=default.target diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 new file mode 100644 index 000000000..978e40b88 --- /dev/null +++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 @@ -0,0 +1,62 @@ +# This is a systemd file to run this docker container under systemd. +# To make this work: +# * pull the image (probably from ops docker registry) +# * place this file in /etc/systemd/system without the .systemd extension +# * run the commands: +# systemctl daemon-reload +# systemctl enable zagg-client-docker +# systemctl start zagg-client-docker +# +# +[Unit] +Description=Zagg Client Contatainer +Requires=docker.service +After=docker.service + + +[Service] +Type=simple +TimeoutStartSec=5m +Environment=HOME=/etc/docker/ops +#Slice=container-small.slice + +# systemd syntax '=-' ignore errors from return codes. +ExecStartPre=-/usr/bin/docker kill "{{ osohm_zagg_client }}" +ExecStartPre=-/usr/bin/docker rm "{{ osohm_zagg_client }}" +ExecStartPre=-/usr/bin/docker pull "{{ osohm_docker_registry_url }}{{ osohm_zagg_client }}" + + +ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }} \ + --privileged \ + --pid=host \ + --net=host \ + -e ZAGG_URL={{ osohm_zagg_web_url }} \ + -e ZAGG_USER={{ osohm_default_zagg_server_user }} \ + -e ZAGG_PASSWORD={{ osohm_default_zagg_server_password }} \ + -e ZAGG_CLIENT_HOSTNAME={{ ec2_tag_Name }} \ + -e ZAGG_SSL_VERIFY={{ osohm_zagg_verify_ssl }} \ + -e OSO_CLUSTER_GROUP={{ cluster_group }} \ + -e OSO_CLUSTER_ID={{ oo_clusterid }} \ + -e OSO_HOST_TYPE={{ hostvars[inventory_hostname]['ec2_tag_host-type'] }} \ + -e OSO_SUB_HOST_TYPE={{ hostvars[inventory_hostname]['ec2_tag_sub-host-type'] }} \ + -v /etc/localtime:/etc/localtime \ + -v /run/pcp:/run/pcp \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock \ +{% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %} + -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig \ + -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \ + -v /etc/openshift/master/master.etcd-client.key:/etc/openshift/master/master.etcd-client.key \ + -v /etc/openshift/master/master-config.yaml:/etc/openshift/master/master-config.yaml \ +{% endif %} + {{ osohm_docker_registry_url }}{{ osohm_zagg_client }} + + +ExecReload=-/usr/bin/docker stop "{{ osohm_zagg_client }}" +ExecReload=-/usr/bin/docker rm "{{ osohm_zagg_client }}" +ExecStop=-/usr/bin/docker stop "{{ osohm_zagg_client }}" +Restart=always +RestartSec=30 + +[Install] +WantedBy=default.target diff --git a/roles/oso_host_monitoring/vars/main.yml b/roles/oso_host_monitoring/vars/main.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/roles/oso_host_monitoring/vars/main.yml @@ -0,0 +1 @@ +--- diff --git a/utils/src/ooinstall/cli_installer.py b/utils/src/ooinstall/cli_installer.py index a19c19c8f..a1632ed0c 100644 --- a/utils/src/ooinstall/cli_installer.py +++ b/utils/src/ooinstall/cli_installer.py @@ -71,7 +71,7 @@ def delete_hosts(hosts): click.echo("\"{}\" doesn't coorespond to any valid input.".format(del_idx)) return hosts, None -def collect_hosts(): +def collect_hosts(master_set=False): """ Collect host information from user. This will later be filled in using ansible. @@ -108,8 +108,10 @@ http://docs.openshift.com/enterprise/latest/architecture/infrastructure_componen value_proc=validate_prompt_hostname) host_props['connect_to'] = hostname_or_ip - - host_props['master'] = click.confirm('Will this host be an OpenShift Master?') + if not master_set: + is_master = click.confirm('Will this host be an OpenShift Master?') + host_props['master'] = is_master + master_set = is_master host_props['node'] = True #TODO: Reenable this option once container installs are out of tech preview @@ -309,7 +311,7 @@ def collect_new_nodes(): Add new nodes here """ click.echo(message) - return collect_hosts() + return collect_hosts(True) def get_installed_hosts(hosts, callback_facts): installed_hosts = [] |