6 files changed, 516 insertions, 108 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 627fa13eb..326e75c7e 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,21 +1,34 @@
-[provide a description of the issue]
+#### Description
+Please provide a brief description of your issue.
 
 ##### Version
-[if you're operating from a git clone provide the output of `git describe`]
-[if you're running from playbooks installed via RPM or atomic-openshift-utils `rpm -q atomic-openshift-utils openshift-ansible`]
-[Your version of ansible, `ansible --version`]
-
+If you're operating from a git clone provide the output of `git describe`. If
+you're running from playbooks installed via RPM or atomic-openshift-utils `rpm
+-q atomic-openshift-utils openshift-ansible`. Please also provide your version
+of ansible, `ansible --version`. Please the output between the code block below.
+```
+Please place output here
+```
 
 ##### Steps To Reproduce
 1. [step 1]
 2. [step 2]
 
+
 ##### Current Result
+```
+Example command and output or error messages
+```
 
 ##### Expected Result
+```
+Example command and output or error messages
+```
 
 ##### Additional Information
-[The exact command you ran]
-[Your operating system and version, ie: RHEL 7.2, Fedora 23]
-[Your inventory file]
-[visit https://docs.openshift.org/latest/welcome/index.html]
+```
+Your operating system and version, ie: RHEL 7.2, Fedora 23]
+Your inventory file
+Sample code, etc
+code, etc
+```
diff --git a/inventory/aws/hosts/ec2.ini b/inventory/aws/hosts/ec2.ini
index 5ee51c84f..64c097d47 100644
--- a/inventory/aws/hosts/ec2.ini
+++ b/inventory/aws/hosts/ec2.ini
@@ -29,17 +29,32 @@ regions_exclude = us-gov-west-1,cn-north-1
 # in the event of a collision.
 destination_variable = public_dns_name
 
+# This allows you to override the inventory_name with an ec2 variable, instead
+# of using the destination_variable above. Addressing (aka ansible_ssh_host)
+# will still use destination_variable. Tags should be written as 'tag_TAGNAME'.
+hostname_variable = tag_Name
+
 # For server inside a VPC, using DNS names may not make sense. When an instance
 # has 'subnet_id' set, this variable is used. If the subnet is public, setting
 # this to 'ip_address' will return the public IP address. For instances in a
 # private subnet, this should be set to 'private_ip_address', and Ansible must
 # be run from within EC2. The key of an EC2 tag may optionally be used; however
 # the boto instance variables hold precedence in the event of a collision.
-# WARNING: - instances that are in the private vpc, _without_ public ip address 
+# WARNING: - instances that are in the private vpc, _without_ public ip address
 # will not be listed in the inventory until You set:
-# vpc_destination_variable = 'private_ip_address'
+# vpc_destination_variable = private_ip_address
 vpc_destination_variable = ip_address
 
+# The following two settings allow flexible ansible host naming based on a
+# python format string and a comma-separated list of ec2 tags.  Note that:
+#
+# 1) If the tags referenced are not present for some instances, empty strings
+#    will be substituted in the format string.
+# 2) This overrides both destination_variable and vpc_destination_variable.
+#
+#destination_format = {0}.{1}.example.com
+#destination_format_tags = Name,environment
+
 # To tag instances on EC2 with the resource records that point to them from
 # Route53, uncomment and set 'route53' to True.
 route53 = False
@@ -67,6 +82,9 @@ all_instances = False
 # 'all_rds_instances' to True return all RDS instances regardless of state.
 all_rds_instances = False
 
+# Include RDS cluster information (Aurora etc.)
+include_rds_clusters = False
+
 # By default, only ElastiCache clusters and nodes in the 'available' state
 # are returned. Set 'all_elasticache_clusters' and/or 'all_elastic_nodes'
 # to True return all ElastiCache clusters and nodes, regardless of state.
@@ -91,19 +109,16 @@ cache_path = ~/.ansible/tmp
 # To disable the cache, set this value to 0
 cache_max_age = 300
 
-# These two settings allow flexible ansible host naming based on a format
-# string and a comma-separated list of ec2 tags.  The tags used must be
-# present for all instances, or the code will fail.  This overrides both
-# destination_variable and vpc_destination_variable.
-# destination_format = {0}.{1}.rhcloud.com
-# destination_format_tags = Name,environment
-
 # Organize groups into a nested/hierarchy instead of a flat namespace.
 nested_groups = False
 
 # Replace - tags when creating groups to avoid issues with ansible
 replace_dash_in_groups = False
 
+# If set to true, any tag of the form "a,b,c" is expanded into a list
+# and the results are used to create additional tag_* inventory groups.
+expand_csv_tags = False
+
 # The EC2 inventory output can become very large. To manage its size,
 # configure which groups should be created.
 group_by_instance_id = True
@@ -147,9 +162,28 @@ group_by_elasticache_replication_group = True
 
 # You can use wildcards in filter values also. Below will list instances which
 # tag Name value matches webservers1*
-# (ex. webservers15, webservers1a, webservers123 etc) 
+# (ex. webservers15, webservers1a, webservers123 etc)
 # instance_filters = tag:Name=webservers1*
 
 # A boto configuration profile may be used to separate out credentials
 # see http://boto.readthedocs.org/en/latest/boto_config_tut.html
 # boto_profile = some-boto-profile-name
+
+
+[credentials]
+
+# The AWS credentials can optionally be specified here. Credentials specified
+# here are ignored if the environment variable AWS_ACCESS_KEY_ID or
+# AWS_PROFILE is set, or if the boto_profile property above is set.
+#
+# Supplying AWS credentials here is not recommended, as it introduces
+# non-trivial security concerns. When going down this route, please make sure
+# to set access permissions for this file correctly, e.g. handle it the same
+# way as you would a private SSH key.
+#
+# Unlike the boto and AWS configure files, this section does not support
+# profiles.
+#
+# aws_access_key_id = AXXXXXXXXXXXXXX
+# aws_secret_access_key = XXXXXXXXXXXXXXXXXXX
+# aws_security_token = XXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/inventory/aws/hosts/ec2.py b/inventory/aws/hosts/ec2.py
index 7dfcd7889..b71458a29 100755
--- a/inventory/aws/hosts/ec2.py
+++ b/inventory/aws/hosts/ec2.py
@@ -38,6 +38,7 @@ When run against a specific host, this script returns the following variables:
  - ec2_attachTime
  - ec2_attachment
  - ec2_attachmentId
+ - ec2_block_devices
  - ec2_client_token
  - ec2_deleteOnTermination
  - ec2_description
@@ -132,6 +133,15 @@ from boto import elasticache
 from boto import route53
 import six
 
+from ansible.module_utils import ec2 as ec2_utils
+
+HAS_BOTO3 = False
+try:
+    import boto3
+    HAS_BOTO3 = True
+except ImportError:
+    pass
+
 from six.moves import configparser
 from collections import defaultdict
 
@@ -142,6 +152,7 @@ except ImportError:
 
 
 class Ec2Inventory(object):
+
     def _empty_inventory(self):
         return {"_meta" : {"hostvars" : {}}}
 
@@ -158,6 +169,9 @@ class Ec2Inventory(object):
         # Boto profile to use (if any)
         self.boto_profile = None
 
+        # AWS credentials.
+        self.credentials = {}
+
         # Read settings and parse CLI arguments
         self.parse_cli_args()
         self.read_settings()
@@ -225,7 +239,7 @@ class Ec2Inventory(object):
         configRegions_exclude = config.get('ec2', 'regions_exclude')
         if (configRegions == 'all'):
             if self.eucalyptus_host:
-                self.regions.append(boto.connect_euca(host=self.eucalyptus_host).region.name)
+                self.regions.append(boto.connect_euca(host=self.eucalyptus_host).region.name, **self.credentials)
             else:
                 for regionInfo in ec2.regions():
                     if regionInfo.name not in configRegions_exclude:
@@ -237,6 +251,11 @@ class Ec2Inventory(object):
         self.destination_variable = config.get('ec2', 'destination_variable')
         self.vpc_destination_variable = config.get('ec2', 'vpc_destination_variable')
 
+        if config.has_option('ec2', 'hostname_variable'):
+            self.hostname_variable = config.get('ec2', 'hostname_variable')
+        else:
+            self.hostname_variable = None
+
         if config.has_option('ec2', 'destination_format') and \
            config.has_option('ec2', 'destination_format_tags'):
             self.destination_format = config.get('ec2', 'destination_format')
@@ -257,6 +276,12 @@ class Ec2Inventory(object):
         if config.has_option('ec2', 'rds'):
             self.rds_enabled = config.getboolean('ec2', 'rds')
 
+        # Include RDS cluster instances?
+        if config.has_option('ec2', 'include_rds_clusters'):
+            self.include_rds_clusters = config.getboolean('ec2', 'include_rds_clusters')
+        else:
+            self.include_rds_clusters = False
+
         # Include ElastiCache instances?
         self.elasticache_enabled = True
         if config.has_option('ec2', 'elasticache'):
@@ -319,6 +344,29 @@ class Ec2Inventory(object):
         if config.has_option('ec2', 'boto_profile') and not self.boto_profile:
             self.boto_profile = config.get('ec2', 'boto_profile')
 
+        # AWS credentials (prefer environment variables)
+        if not (self.boto_profile or os.environ.get('AWS_ACCESS_KEY_ID') or
+                os.environ.get('AWS_PROFILE')):
+            if config.has_option('credentials', 'aws_access_key_id'):
+                aws_access_key_id = config.get('credentials', 'aws_access_key_id')
+            else:
+                aws_access_key_id = None
+            if config.has_option('credentials', 'aws_secret_access_key'):
+                aws_secret_access_key = config.get('credentials', 'aws_secret_access_key')
+            else:
+                aws_secret_access_key = None
+            if config.has_option('credentials', 'aws_security_token'):
+                aws_security_token = config.get('credentials', 'aws_security_token')
+            else:
+                aws_security_token = None
+            if aws_access_key_id:
+                self.credentials = {
+                    'aws_access_key_id': aws_access_key_id,
+                    'aws_secret_access_key': aws_secret_access_key
+                }
+                if aws_security_token:
+                    self.credentials['security_token'] = aws_security_token
+
         # Cache related
         cache_dir = os.path.expanduser(config.get('ec2', 'cache_path'))
         if self.boto_profile:
@@ -326,10 +374,22 @@ class Ec2Inventory(object):
         if not os.path.exists(cache_dir):
             os.makedirs(cache_dir)
 
-        self.cache_path_cache = cache_dir + "/ansible-ec2.cache"
-        self.cache_path_index = cache_dir + "/ansible-ec2.index"
+        cache_name = 'ansible-ec2'
+        aws_profile = lambda: (self.boto_profile or
+                               os.environ.get('AWS_PROFILE') or
+                               os.environ.get('AWS_ACCESS_KEY_ID') or
+                               self.credentials.get('aws_access_key_id', None))
+        if aws_profile():
+            cache_name = '%s-%s' % (cache_name, aws_profile())
+        self.cache_path_cache = cache_dir + "/%s.cache" % cache_name
+        self.cache_path_index = cache_dir + "/%s.index" % cache_name
         self.cache_max_age = config.getint('ec2', 'cache_max_age')
 
+        if config.has_option('ec2', 'expand_csv_tags'):
+            self.expand_csv_tags = config.getboolean('ec2', 'expand_csv_tags')
+        else:
+            self.expand_csv_tags = False
+
         # Configure nested groups instead of flat namespace.
         if config.has_option('ec2', 'nested_groups'):
             self.nested_groups = config.getboolean('ec2', 'nested_groups')
@@ -391,7 +451,10 @@ class Ec2Inventory(object):
         # Instance filters (see boto and EC2 API docs). Ignore invalid filters.
         self.ec2_instance_filters = defaultdict(list)
         if config.has_option('ec2', 'instance_filters'):
-            for instance_filter in config.get('ec2', 'instance_filters', '').split(','):
+
+            filters = [f for f in config.get('ec2', 'instance_filters').split(',') if f]
+
+            for instance_filter in filters:
                 instance_filter = instance_filter.strip()
                 if not instance_filter or '=' not in instance_filter:
                     continue
@@ -410,7 +473,7 @@ class Ec2Inventory(object):
                            help='Get all the variables about a specific instance')
         parser.add_argument('--refresh-cache', action='store_true', default=False,
                            help='Force refresh of cache by making API requests to EC2 (default: False - use cache files)')
-        parser.add_argument('--boto-profile', action='store',
+        parser.add_argument('--profile', '--boto-profile', action='store', dest='boto_profile',
                            help='Use boto profile for connections to EC2')
         self.args = parser.parse_args()
 
@@ -428,6 +491,8 @@ class Ec2Inventory(object):
             if self.elasticache_enabled:
                 self.get_elasticache_clusters_by_region(region)
                 self.get_elasticache_replication_groups_by_region(region)
+            if self.include_rds_clusters:
+                self.include_rds_clusters_by_region(region)
 
         self.write_to_cache(self.inventory, self.cache_path_cache)
         self.write_to_cache(self.index, self.cache_path_index)
@@ -435,7 +500,7 @@ class Ec2Inventory(object):
     def connect(self, region):
         ''' create connection to api server'''
         if self.eucalyptus:
-            conn = boto.connect_euca(host=self.eucalyptus_host)
+            conn = boto.connect_euca(host=self.eucalyptus_host, **self.credentials)
             conn.APIVersion = '2010-08-31'
         else:
             conn = self.connect_to_aws(ec2, region)
@@ -449,7 +514,7 @@ class Ec2Inventory(object):
         return connect_args
 
     def connect_to_aws(self, module, region):
-        connect_args = {}
+        connect_args = self.credentials
 
         # only pass the profile name if it's set (as it is not supported by older boto versions)
         if self.boto_profile:
@@ -475,8 +540,25 @@ class Ec2Inventory(object):
             else:
                 reservations = conn.get_all_instances()
 
+            # Pull the tags back in a second step
+            # AWS are on record as saying that the tags fetched in the first `get_all_instances` request are not
+            # reliable and may be missing, and the only way to guarantee they are there is by calling `get_all_tags`
+            instance_ids = []
+            for reservation in reservations:
+                instance_ids.extend([instance.id for instance in reservation.instances])
+
+            max_filter_value = 199
+            tags = []
+            for i in range(0, len(instance_ids), max_filter_value):
+                tags.extend(conn.get_all_tags(filters={'resource-type': 'instance', 'resource-id': instance_ids[i:i+max_filter_value]}))
+
+            tags_by_instance_id = defaultdict(dict)
+            for tag in tags:
+                tags_by_instance_id[tag.res_id][tag.name] = tag.value
+
             for reservation in reservations:
                 for instance in reservation.instances:
+                    instance.tags = tags_by_instance_id[instance.id]
                     self.add_instance(instance, region)
 
         except boto.exception.BotoServerError as e:
@@ -494,9 +576,14 @@ class Ec2Inventory(object):
         try:
             conn = self.connect_to_aws(rds, region)
             if conn:
-                instances = conn.get_all_dbinstances()
-                for instance in instances:
-                    self.add_rds_instance(instance, region)
+                marker = None
+                while True:
+                    instances = conn.get_all_dbinstances(marker=marker)
+                    marker = instances.marker
+                    for instance in instances:
+                        self.add_rds_instance(instance, region)
+                    if not marker:
+                        break
         except boto.exception.BotoServerError as e:
             error = e.reason
 
@@ -506,6 +593,65 @@ class Ec2Inventory(object):
                 error = "Looks like AWS RDS is down:\n%s" % e.message
             self.fail_with_error(error, 'getting RDS instances')
 
+    def include_rds_clusters_by_region(self, region):
+        if not HAS_BOTO3:
+            self.fail_with_error("Working with RDS clusters requires boto3 - please install boto3 and try again",
+                                 "getting RDS clusters")
+
+        client = ec2_utils.boto3_inventory_conn('client', 'rds', region, **self.credentials)
+
+        marker, clusters = '', []
+        while marker is not None:
+            resp = client.describe_db_clusters(Marker=marker)
+            clusters.extend(resp["DBClusters"])
+            marker = resp.get('Marker', None)
+
+        account_id = boto.connect_iam().get_user().arn.split(':')[4]
+        c_dict = {}
+        for c in clusters:
+            # remove these datetime objects as there is no serialisation to json
+            # currently in place and we don't need the data yet
+            if 'EarliestRestorableTime' in c:
+                del c['EarliestRestorableTime']
+            if 'LatestRestorableTime' in c:
+                del c['LatestRestorableTime']
+
+            if self.ec2_instance_filters == {}:
+                matches_filter = True
+            else:
+                matches_filter = False
+
+            try:
+                # arn:aws:rds:<region>:<account number>:<resourcetype>:<name>
+                tags = client.list_tags_for_resource(
+                    ResourceName='arn:aws:rds:' + region + ':' + account_id + ':cluster:' + c['DBClusterIdentifier'])
+                c['Tags'] = tags['TagList']
+
+                if self.ec2_instance_filters:
+                    for filter_key, filter_values in self.ec2_instance_filters.items():
+                        # get AWS tag key e.g. tag:env will be 'env'
+                        tag_name = filter_key.split(":", 1)[1]
+                        # Filter values is a list (if you put multiple values for the same tag name)
+                        matches_filter = any(d['Key'] == tag_name and d['Value'] in filter_values for d in c['Tags'])
+
+                        if matches_filter:
+                            # it matches a filter, so stop looking for further matches
+                            break
+
+            except Exception as e:
+                if e.message.find('DBInstanceNotFound') >= 0:
+                    # AWS RDS bug (2016-01-06) means deletion does not fully complete and leave an 'empty' cluster.
+                    # Ignore errors when trying to find tags for these
+                    pass
+
+            # ignore empty clusters caused by AWS bug
+            if len(c['DBClusterMembers']) == 0:
+                continue
+            elif matches_filter:
+                c_dict[c['DBClusterIdentifier']] = c
+
+        self.inventory['db_clusters'] = c_dict
+
     def get_elasticache_clusters_by_region(self, region):
         ''' Makes an AWS API call to the list of ElastiCache clusters (with
         nodes' info) in a particular region.'''
@@ -514,7 +660,7 @@ class Ec2Inventory(object):
         # that's why we need to call describe directly (it would be called by
         # the shorthand method anyway...)
         try:
-            conn = elasticache.connect_to_region(region)
+            conn = self.connect_to_aws(elasticache, region)
             if conn:
                 # show_cache_node_info = True
                 # because we also want nodes' information
@@ -531,7 +677,7 @@ class Ec2Inventory(object):
 
         try:
             # Boto also doesn't provide wrapper classes to CacheClusters or
-            # CacheNodes. Because of that wo can't make use of the get_list
+            # CacheNodes. Because of that we can't make use of the get_list
             # method in the AWSQueryConnection. Let's do the work manually
             clusters = response['DescribeCacheClustersResponse']['DescribeCacheClustersResult']['CacheClusters']
 
@@ -550,7 +696,7 @@ class Ec2Inventory(object):
         # that's why we need to call describe directly (it would be called by
         # the shorthand method anyway...)
         try:
-            conn = elasticache.connect_to_region(region)
+            conn = self.connect_to_aws(elasticache, region)
             if conn:
                 response = conn.describe_replication_groups()
 
@@ -565,7 +711,7 @@ class Ec2Inventory(object):
 
         try:
             # Boto also doesn't provide wrapper classes to ReplicationGroups
-            # Because of that wo can't make use of the get_list method in the
+            # Because of that we can't make use of the get_list method in the
             # AWSQueryConnection. Let's do the work manually
             replication_groups = response['DescribeReplicationGroupsResponse']['DescribeReplicationGroupsResult']['ReplicationGroups']
 
@@ -619,7 +765,7 @@ class Ec2Inventory(object):
 
         # Select the best destination address
         if self.destination_format and self.destination_format_tags:
-            dest = self.destination_format.format(*[ getattr(instance, 'tags').get(tag, 'nil') for tag in self.destination_format_tags ])
+            dest = self.destination_format.format(*[ getattr(instance, 'tags').get(tag, '') for tag in self.destination_format_tags ])
         elif instance.subnet_id:
             dest = getattr(instance, self.vpc_destination_variable, None)
             if dest is None:
@@ -633,32 +779,46 @@ class Ec2Inventory(object):
             # Skip instances we cannot address (e.g. private VPC subnet)
             return
 
+        # Set the inventory name
+        hostname = None
+        if self.hostname_variable:
+            if self.hostname_variable.startswith('tag_'):
+                hostname = instance.tags.get(self.hostname_variable[4:], None)
+            else:
+                hostname = getattr(instance, self.hostname_variable)
+
+        # If we can't get a nice hostname, use the destination address
+        if not hostname:
+            hostname = dest
+        else:
+            hostname = self.to_safe(hostname).lower()
+
         # if we only want to include hosts that match a pattern, skip those that don't
-        if self.pattern_include and not self.pattern_include.match(dest):
+        if self.pattern_include and not self.pattern_include.match(hostname):
             return
 
         # if we need to exclude hosts that match a pattern, skip those
-        if self.pattern_exclude and self.pattern_exclude.match(dest):
+        if self.pattern_exclude and self.pattern_exclude.match(hostname):
             return
 
         # Add to index
-        self.index[dest] = [region, instance.id]
+        self.index[hostname] = [region, instance.id]
 
         # Inventory: Group by instance ID (always a group of 1)
         if self.group_by_instance_id:
-            self.inventory[instance.id] = [dest]
+            self.inventory[instance.id] = [hostname]
             if self.nested_groups:
                 self.push_group(self.inventory, 'instances', instance.id)
 
         # Inventory: Group by region
         if self.group_by_region:
-            self.push(self.inventory, region, dest)
+            self.push(self.inventory, region, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'regions', region)
 
         # Inventory: Group by availability zone
         if self.group_by_availability_zone:
-            self.push(self.inventory, instance.placement, dest)
+            self.push(self.inventory, instance.placement, hostname)
             if self.nested_groups:
                 if self.group_by_region:
                     self.push_group(self.inventory, region, instance.placement)
@@ -667,28 +827,28 @@ class Ec2Inventory(object):
         # Inventory: Group by Amazon Machine Image (AMI) ID
         if self.group_by_ami_id:
             ami_id = self.to_safe(instance.image_id)
-            self.push(self.inventory, ami_id, dest)
+            self.push(self.inventory, ami_id, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'images', ami_id)
 
         # Inventory: Group by instance type
         if self.group_by_instance_type:
             type_name = self.to_safe('type_' + instance.instance_type)
-            self.push(self.inventory, type_name, dest)
+            self.push(self.inventory, type_name, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'types', type_name)
 
         # Inventory: Group by key pair
         if self.group_by_key_pair and instance.key_name:
             key_name = self.to_safe('key_' + instance.key_name)
-            self.push(self.inventory, key_name, dest)
+            self.push(self.inventory, key_name, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'keys', key_name)
 
         # Inventory: Group by VPC
         if self.group_by_vpc_id and instance.vpc_id:
             vpc_id_name = self.to_safe('vpc_id_' + instance.vpc_id)
-            self.push(self.inventory, vpc_id_name, dest)
+            self.push(self.inventory, vpc_id_name, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'vpcs', vpc_id_name)
 
@@ -697,7 +857,7 @@ class Ec2Inventory(object):
             try:
                 for group in instance.groups:
                     key = self.to_safe("security_group_" + group.name)
-                    self.push(self.inventory, key, dest)
+                    self.push(self.inventory, key, hostname)
                     if self.nested_groups:
                         self.push_group(self.inventory, 'security_groups', key)
             except AttributeError:
@@ -707,34 +867,41 @@ class Ec2Inventory(object):
         # Inventory: Group by tag keys
         if self.group_by_tag_keys:
             for k, v in instance.tags.items():
-                if v:
-                    key = self.to_safe("tag_" + k + "=" + v)
+                if self.expand_csv_tags and v and ',' in v:
+                    values = map(lambda x: x.strip(), v.split(','))
                 else:
-                    key = self.to_safe("tag_" + k)
-                self.push(self.inventory, key, dest)
-                if self.nested_groups:
-                    self.push_group(self.inventory, 'tags', self.to_safe("tag_" + k))
+                    values = [v]
+
+                for v in values:
                     if v:
-                        self.push_group(self.inventory, self.to_safe("tag_" + k), key)
+                        key = self.to_safe("tag_" + k + "=" + v)
+                    else:
+                        key = self.to_safe("tag_" + k)
+                    self.push(self.inventory, key, hostname)
+                    if self.nested_groups:
+                        self.push_group(self.inventory, 'tags', self.to_safe("tag_" + k))
+                        if v:
+                            self.push_group(self.inventory, self.to_safe("tag_" + k), key)
 
         # Inventory: Group by Route53 domain names if enabled
         if self.route53_enabled and self.group_by_route53_names:
             route53_names = self.get_instance_route53_names(instance)
             for name in route53_names:
-                self.push(self.inventory, name, dest)
+                self.push(self.inventory, name, hostname)
                 if self.nested_groups:
                     self.push_group(self.inventory, 'route53', name)
 
         # Global Tag: instances without tags
         if self.group_by_tag_none and len(instance.tags) == 0:
-            self.push(self.inventory, 'tag_none', dest)
+            self.push(self.inventory, 'tag_none', hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'tags', 'tag_none')
 
         # Global Tag: tag all EC2 instances
-        self.push(self.inventory, 'ec2', dest)
+        self.push(self.inventory, 'ec2', hostname)
 
-        self.inventory["_meta"]["hostvars"][dest] = self.get_host_info_dict_from_instance(instance)
+        self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance)
+        self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest
 
 
     def add_rds_instance(self, instance, region):
@@ -752,24 +919,38 @@ class Ec2Inventory(object):
             # Skip instances we cannot address (e.g. private VPC subnet)
             return
 
+        # Set the inventory name
+        hostname = None
+        if self.hostname_variable:
+            if self.hostname_variable.startswith('tag_'):
+                hostname = instance.tags.get(self.hostname_variable[4:], None)
+            else:
+                hostname = getattr(instance, self.hostname_variable)
+
+        # If we can't get a nice hostname, use the destination address
+        if not hostname:
+            hostname = dest
+
+        hostname = self.to_safe(hostname).lower()
+
         # Add to index
-        self.index[dest] = [region, instance.id]
+        self.index[hostname] = [region, instance.id]
 
         # Inventory: Group by instance ID (always a group of 1)
         if self.group_by_instance_id:
-            self.inventory[instance.id] = [dest]
+            self.inventory[instance.id] = [hostname]
             if self.nested_groups:
                 self.push_group(self.inventory, 'instances', instance.id)
 
         # Inventory: Group by region
         if self.group_by_region:
-            self.push(self.inventory, region, dest)
+            self.push(self.inventory, region, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'regions', region)
 
         # Inventory: Group by availability zone
         if self.group_by_availability_zone:
-            self.push(self.inventory, instance.availability_zone, dest)
+            self.push(self.inventory, instance.availability_zone, hostname)
             if self.nested_groups:
                 if self.group_by_region:
                     self.push_group(self.inventory, region, instance.availability_zone)
@@ -778,14 +959,14 @@ class Ec2Inventory(object):
         # Inventory: Group by instance type
         if self.group_by_instance_type:
             type_name = self.to_safe('type_' + instance.instance_class)
-            self.push(self.inventory, type_name, dest)
+            self.push(self.inventory, type_name, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'types', type_name)
 
         # Inventory: Group by VPC
         if self.group_by_vpc_id and instance.subnet_group and instance.subnet_group.vpc_id:
             vpc_id_name = self.to_safe('vpc_id_' + instance.subnet_group.vpc_id)
-            self.push(self.inventory, vpc_id_name, dest)
+            self.push(self.inventory, vpc_id_name, hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'vpcs', vpc_id_name)
 
@@ -794,7 +975,7 @@ class Ec2Inventory(object):
             try:
                 if instance.security_group:
                     key = self.to_safe("security_group_" + instance.security_group.name)
-                    self.push(self.inventory, key, dest)
+                    self.push(self.inventory, key, hostname)
                     if self.nested_groups:
                         self.push_group(self.inventory, 'security_groups', key)
 
@@ -805,20 +986,21 @@ class Ec2Inventory(object):
 
         # Inventory: Group by engine
         if self.group_by_rds_engine:
-            self.push(self.inventory, self.to_safe("rds_" + instance.engine), dest)
+            self.push(self.inventory, self.to_safe("rds_" + instance.engine), hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'rds_engines', self.to_safe("rds_" + instance.engine))
 
         # Inventory: Group by parameter group
         if self.group_by_rds_parameter_group:
-            self.push(self.inventory, self.to_safe("rds_parameter_group_" + instance.parameter_group.name), dest)
+            self.push(self.inventory, self.to_safe("rds_parameter_group_" + instance.parameter_group.name), hostname)
             if self.nested_groups:
                 self.push_group(self.inventory, 'rds_parameter_groups', self.to_safe("rds_parameter_group_" + instance.parameter_group.name))
 
         # Global Tag: all RDS instances
-        self.push(self.inventory, 'rds', dest)
+        self.push(self.inventory, 'rds', hostname)
 
-        self.inventory["_meta"]["hostvars"][dest] = self.get_host_info_dict_from_instance(instance)
+        self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance)
+        self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest
 
     def add_elasticache_cluster(self, cluster, region):
         ''' Adds an ElastiCache cluster to the inventory and index, as long as
@@ -1131,6 +1313,8 @@ class Ec2Inventory(object):
                 instance_vars['ec2_placement'] = value.zone
             elif key == 'ec2_tags':
                 for k, v in value.items():
+                    if self.expand_csv_tags and ',' in v:
+                        v = list(map(lambda x: x.strip(), v.split(',')))
                     key = self.to_safe('ec2_tag_' + k)
                     instance_vars[key] = v
             elif key == 'ec2_groups':
@@ -1141,6 +1325,10 @@ class Ec2Inventory(object):
                     group_names.append(group.name)
                 instance_vars["ec2_security_group_ids"] = ','.join([str(i) for i in group_ids])
                 instance_vars["ec2_security_group_names"] = ','.join([str(i) for i in group_names])
+            elif key == 'ec2_block_device_mapping':
+                instance_vars["ec2_block_devices"] = {}
+                for k, v in value.items():
+                    instance_vars["ec2_block_devices"][ os.path.basename(k) ] = v.volume_id
             else:
                 pass
                 # TODO Product codes if someone finds them useful
@@ -1321,4 +1509,3 @@ class Ec2Inventory(object):
 
 # Run the script
 Ec2Inventory()
-
diff --git a/inventory/gce/hosts/gce.py b/inventory/gce/hosts/gce.py
index cce3c5f35..2be46a58c 100755
--- a/inventory/gce/hosts/gce.py
+++ b/inventory/gce/hosts/gce.py
@@ -70,7 +70,8 @@ Examples:
   $ contrib/inventory/gce.py --host my_instance
 
 Author: Eric Johnson <erjohnso@google.com>
-Version: 0.0.1
+Contributors: Matt Hite <mhite@hotmail.com>, Tom Melendez <supertom@google.com>
+Version: 0.0.3
 '''
 
 __requires__ = ['pycrypto>=2.6']
@@ -84,13 +85,19 @@ except ImportError:
     pass
 
 USER_AGENT_PRODUCT="Ansible-gce_inventory_plugin"
-USER_AGENT_VERSION="v1"
+USER_AGENT_VERSION="v2"
 
 import sys
 import os
 import argparse
+
+from time import time
+
 import ConfigParser
 
+import logging
+logging.getLogger('libcloud.common.google').addHandler(logging.NullHandler())
+
 try:
     import json
 except ImportError:
@@ -101,31 +108,103 @@ try:
     from libcloud.compute.providers import get_driver
     _ = Provider.GCE
 except:
-    print("GCE inventory script requires libcloud >= 0.13")
-    sys.exit(1)
+    sys.exit("GCE inventory script requires libcloud >= 0.13")
+
+
+class CloudInventoryCache(object):
+    def __init__(self, cache_name='ansible-cloud-cache', cache_path='/tmp',
+                 cache_max_age=300):
+        cache_dir = os.path.expanduser(cache_path)
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir)
+        self.cache_path_cache = os.path.join(cache_dir, cache_name)
+
+        self.cache_max_age = cache_max_age
+
+    def is_valid(self, max_age=None):
+        ''' Determines if the cache files have expired, or if it is still valid '''
+
+        if max_age is None:
+            max_age = self.cache_max_age
+
+        if os.path.isfile(self.cache_path_cache):
+            mod_time = os.path.getmtime(self.cache_path_cache)
+            current_time = time()
+            if (mod_time + max_age) > current_time:
+                return True
+
+        return False
+
+    def get_all_data_from_cache(self, filename=''):
+        ''' Reads the JSON inventory from the cache file. Returns Python dictionary. '''
+
+        data = ''
+        if not filename:
+            filename = self.cache_path_cache
+        with open(filename, 'r') as cache:
+            data = cache.read()
+        return json.loads(data)
+
+    def write_to_cache(self, data, filename=''):
+        ''' Writes data to file as JSON.  Returns True. '''
+        if not filename:
+            filename = self.cache_path_cache
+        json_data = json.dumps(data)
+        with open(filename, 'w') as cache:
+            cache.write(json_data)
+        return True
 
 
 class GceInventory(object):
     def __init__(self):
+        # Cache object
+        self.cache = None
+        # dictionary containing inventory read from disk
+        self.inventory = {}
+
         # Read settings and parse CLI arguments
         self.parse_cli_args()
+        self.config = self.get_config()
         self.driver = self.get_gce_driver()
+        self.ip_type = self.get_inventory_options()
+        if self.ip_type:
+            self.ip_type = self.ip_type.lower()
+
+        # Cache management
+        start_inventory_time = time()
+        cache_used = False
+        if self.args.refresh_cache or not self.cache.is_valid():
+            self.do_api_calls_update_cache()
+        else:
+            self.load_inventory_from_cache()
+            cache_used = True
+            self.inventory['_meta']['stats'] = {'use_cache': True}
+        self.inventory['_meta']['stats'] = {
+            'inventory_load_time': time() - start_inventory_time,
+            'cache_used': cache_used
+        }
 
         # Just display data for specific host
         if self.args.host:
-            print(self.json_format_dict(self.node_to_dict(
-                    self.get_instance(self.args.host)),
-                    pretty=self.args.pretty))
-            sys.exit(0)
-
-        # Otherwise, assume user wants all instances grouped
-        print(self.json_format_dict(self.group_instances(),
-            pretty=self.args.pretty))
+            print(self.json_format_dict(
+                self.inventory['_meta']['hostvars'][self.args.host],
+                pretty=self.args.pretty))
+        else:
+            # Otherwise, assume user wants all instances grouped
+            zones = self.parse_env_zones()
+            print(self.json_format_dict(self.inventory,
+                                        pretty=self.args.pretty))
         sys.exit(0)
 
-    def get_gce_driver(self):
-        """Determine the GCE authorization settings and return a
-        libcloud driver.
+    def get_config(self):
+        """
+        Reads the settings from the gce.ini file.
+
+        Populates a SafeConfigParser object with defaults and
+        attempts to read an .ini-style configuration from the filename
+        specified in GCE_INI_PATH. If the environment variable is
+        not present, the filename defaults to gce.ini in the current
+        working directory.
         """
         gce_ini_default_path = os.path.join(
             os.path.dirname(os.path.realpath(__file__)), "gce.ini")
@@ -140,14 +219,57 @@ class GceInventory(object):
             'gce_service_account_pem_file_path': '',
             'gce_project_id': '',
             'libcloud_secrets': '',
+            'inventory_ip_type': '',
+            'cache_path': '~/.ansible/tmp',
+            'cache_max_age': '300'
         })
         if 'gce' not in config.sections():
             config.add_section('gce')
+        if 'inventory' not in config.sections():
+            config.add_section('inventory')
+        if 'cache' not in config.sections():
+            config.add_section('cache')
+
         config.read(gce_ini_path)
 
+        #########
+        # Section added for processing ini settings
+        #########
+
+        # Set the instance_states filter based on config file options
+        self.instance_states = []
+        if config.has_option('gce', 'instance_states'):
+            states = config.get('gce', 'instance_states')
+            # Ignore if instance_states is an empty string.
+            if states:
+                self.instance_states = states.split(',')
+
+        # Caching
+        cache_path = config.get('cache', 'cache_path')
+        cache_max_age = config.getint('cache', 'cache_max_age')
+        # TOOD(supertom): support project-specific caches
+        cache_name = 'ansible-gce.cache'
+        self.cache = CloudInventoryCache(cache_path=cache_path,
+                                         cache_max_age=cache_max_age,
+                                         cache_name=cache_name)
+        return config
+
+    def get_inventory_options(self):
+        """Determine inventory options. Environment variables always
+        take precedence over configuration files."""
+        ip_type = self.config.get('inventory', 'inventory_ip_type')
+        # If the appropriate environment variables are set, they override
+        # other configuration
+        ip_type = os.environ.get('INVENTORY_IP_TYPE', ip_type)
+        return ip_type
+
+    def get_gce_driver(self):
+        """Determine the GCE authorization settings and return a
+        libcloud driver.
+        """
         # Attempt to get GCE params from a configuration file, if one
         # exists.
-        secrets_path = config.get('gce', 'libcloud_secrets')
+        secrets_path = self.config.get('gce', 'libcloud_secrets')
         secrets_found = False
         try:
             import secrets
@@ -161,8 +283,7 @@ class GceInventory(object):
             if not secrets_path.endswith('secrets.py'):
                 err = "Must specify libcloud secrets file as "
                 err += "/absolute/path/to/secrets.py"
-                print(err)
-                sys.exit(1)
+                sys.exit(err)
             sys.path.append(os.path.dirname(secrets_path))
             try:
                 import secrets
@@ -173,10 +294,10 @@ class GceInventory(object):
                 pass
         if not secrets_found:
             args = [
-                config.get('gce','gce_service_account_email_address'),
-                config.get('gce','gce_service_account_pem_file_path')
+                self.config.get('gce','gce_service_account_email_address'),
+                self.config.get('gce','gce_service_account_pem_file_path')
             ]
-            kwargs = {'project': config.get('gce', 'gce_project_id')}
+            kwargs = {'project': self.config.get('gce', 'gce_project_id')}
 
         # If the appropriate environment variables are set, they override
         # other configuration; process those into our args and kwargs.
@@ -191,6 +312,14 @@ class GceInventory(object):
         )
         return gce
 
+    def parse_env_zones(self):
+        '''returns a list of comma separated zones parsed from the GCE_ZONE environment variable.
+        If provided, this will be used to filter the results of the grouped_instances call'''
+        import csv
+        reader = csv.reader([os.environ.get('GCE_ZONE',"")], skipinitialspace=True)
+        zones = [r for r in reader]
+        return [z for z in zones[0]]
+
     def parse_cli_args(self):
         ''' Command line argument processing '''
 
@@ -202,6 +331,9 @@ class GceInventory(object):
                            help='Get all information about an instance')
         parser.add_argument('--pretty', action='store_true', default=False,
                            help='Pretty format (default: False)')
+        parser.add_argument(
+            '--refresh-cache', action='store_true', default=False,
+            help='Force refresh of cache by making API requests (default: False - use cache files)')
         self.args = parser.parse_args()
 
 
@@ -211,11 +343,17 @@ class GceInventory(object):
         if inst is None:
             return {}
 
-        if inst.extra['metadata'].has_key('items'):
+        if 'items' in inst.extra['metadata']:
             for entry in inst.extra['metadata']['items']:
                 md[entry['key']] = entry['value']
 
         net = inst.extra['networkInterfaces'][0]['network'].split('/')[-1]
+        # default to exernal IP unless user has specified they prefer internal
+        if self.ip_type == 'internal':
+            ssh_host = inst.private_ips[0]
+        else:
+            ssh_host = inst.public_ips[0] if len(inst.public_ips) >= 1 else inst.private_ips[0]
+
         return {
             'gce_uuid': inst.uuid,
             'gce_id': inst.id,
@@ -231,29 +369,67 @@ class GceInventory(object):
             'gce_metadata': md,
             'gce_network': net,
             # Hosts don't have a public name, so we add an IP
-            'ansible_ssh_host': inst.public_ips[0] if len(inst.public_ips) >= 1 else inst.private_ips[0]
+            'ansible_ssh_host': ssh_host
         }
 
-    def get_instance(self, instance_name):
-        '''Gets details about a specific instance '''
+    def load_inventory_from_cache(self):
+        ''' Loads inventory from JSON on disk. '''
+
         try:
-            return self.driver.ex_get_node(instance_name)
+            self.inventory = self.cache.get_all_data_from_cache()
+            hosts = self.inventory['_meta']['hostvars']
         except Exception as e:
-            return None
-
-    def group_instances(self):
+            print(
+                "Invalid inventory file %s.  Please rebuild with -refresh-cache option."
+                % (self.cache.cache_path_cache))
+            raise
+
+    def do_api_calls_update_cache(self):
+        ''' Do API calls and save data in cache. '''
+        zones = self.parse_env_zones()
+        data = self.group_instances(zones)
+        self.cache.write_to_cache(data)
+        self.inventory = data
+
+    def list_nodes(self):
+        all_nodes = []
+        params, more_results = {'maxResults': 500}, True
+        while more_results:
+            self.driver.connection.gce_params=params
+            all_nodes.extend(self.driver.list_nodes())
+            more_results = 'pageToken' in params
+        return all_nodes
+
+    def group_instances(self, zones=None):
         '''Group all instances'''
         groups = {}
         meta = {}
         meta["hostvars"] = {}
 
-        for node in self.driver.list_nodes():
+        for node in self.list_nodes():
+
+            # This check filters on the desired instance states defined in the
+            # config file with the instance_states config option.
+            #
+            # If the instance_states list is _empty_ then _ALL_ states are returned.
+            #
+            # If the instance_states list is _populated_ then check the current
+            # state against the instance_states list
+            if self.instance_states and not node.extra['status'] in self.instance_states:
+                continue
+
             name = node.name
 
             meta["hostvars"][name] = self.node_to_dict(node)
 
             zone = node.extra['zone'].name
-            if groups.has_key(zone): groups[zone].append(name)
+
+            # To avoid making multiple requests per zone
+            # we list all nodes and then filter the results
+            if zones and zone not in zones:
+                continue
+
+            if zone in groups: groups[zone].append(name)
             else: groups[zone] = [name]
 
             tags = node.extra['tags']
@@ -262,25 +438,25 @@ class GceInventory(object):
                     tag = t[6:]
                 else:
                     tag = 'tag_%s' % t
-                if groups.has_key(tag): groups[tag].append(name)
+                if tag in groups: groups[tag].append(name)
                 else: groups[tag] = [name]
 
             net = node.extra['networkInterfaces'][0]['network'].split('/')[-1]
             net = 'network_%s' % net
-            if groups.has_key(net): groups[net].append(name)
+            if net in groups: groups[net].append(name)
             else: groups[net] = [name]
 
             machine_type = node.size
-            if groups.has_key(machine_type): groups[machine_type].append(name)
+            if machine_type in groups: groups[machine_type].append(name)
             else: groups[machine_type] = [name]
 
             image = node.image and node.image or 'persistent_disk'
-            if groups.has_key(image): groups[image].append(name)
+            if image in groups: groups[image].append(name)
             else: groups[image] = [name]
 
             status = node.extra['status']
             stat = 'status_%s' % status.lower()
-            if groups.has_key(stat): groups[stat].append(name)
+            if stat in groups: groups[stat].append(name)
             else: groups[stat] = [name]
 
         groups["_meta"] = meta
@@ -296,6 +472,6 @@ class GceInventory(object):
         else:
             return json.dumps(data)
 
-
 # Run the script
-GceInventory()
+if __name__ == '__main__':
+    GceInventory()
diff --git a/utils/test/openshift_ansible_tests.py b/utils/test/openshift_ansible_tests.py
index fe3029ca1..5847fe37b 100644
--- a/utils/test/openshift_ansible_tests.py
+++ b/utils/test/openshift_ansible_tests.py
@@ -58,7 +58,6 @@ class TestOpenShiftAnsible(unittest.TestCase):
         self.assertTrue(inventory.has_option('new_nodes', 'new_node1'))
 
     def test_write_inventory_vars_role_vars(self):
-        print(yaml.dump(openshift_ansible.CFG.deployment.roles))
         with open(self.inventory, 'w') as inv:
             openshift_ansible.CFG.deployment.roles['master'].variables={'color': 'blue'}
             openshift_ansible.CFG.deployment.roles['node'].variables={'color': 'green'}
diff --git a/utils/test/test_utils.py b/utils/test/test_utils.py
index b18f85692..cbce64f7e 100644
--- a/utils/test/test_utils.py
+++ b/utils/test/test_utils.py
@@ -38,7 +38,6 @@ class TestUtils(unittest.TestCase):
 
         with mock.patch('ooinstall.utils.installer_log') as _il:
             debug_env(self.debug_all_params)
-            print(_il.debug.call_args_list)
 
             # Debug was called for each item we expect
             self.assertEqual(