diff options
Diffstat (limited to 'roles')
-rw-r--r-- | roles/nuage_master/templates/nuage-openshift-monitor.j2 | 5 | ||||
-rwxr-xr-x | roles/openshift_facts/library/openshift_facts.py | 15 | ||||
-rw-r--r-- | roles/openshift_hosted_logging/tasks/deploy_logging.yaml | 2 | ||||
-rw-r--r-- | roles/openshift_master/tasks/main.yml | 23 | ||||
-rw-r--r-- | roles/openshift_node/tasks/main.yml | 14 |
5 files changed, 40 insertions, 19 deletions
diff --git a/roles/nuage_master/templates/nuage-openshift-monitor.j2 b/roles/nuage_master/templates/nuage-openshift-monitor.j2 index b2539517b..63117adc0 100644 --- a/roles/nuage_master/templates/nuage-openshift-monitor.j2 +++ b/roles/nuage_master/templates/nuage-openshift-monitor.j2 @@ -37,4 +37,7 @@ etcdClientConfig: certFile: {{ openshift_master_config_dir }}/master.etcd-client.crt keyFile: {{ openshift_master_config_dir }}/master.etcd-client.key urls: - - {{ openshift.master.etcd_urls }} +{% for etcd_url in openshift.master.etcd_urls %} + - {{ etcd_url }} +{% endfor %} + diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 9ffd399bc..0ee018c5c 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -364,12 +364,15 @@ def normalize_openstack_facts(metadata, facts): facts['network']['ip'] = local_ipv4 facts['network']['public_ip'] = metadata['ec2_compat']['public-ipv4'] - # TODO: verify local hostname makes sense and is resolvable - facts['network']['hostname'] = metadata['hostname'] - - # TODO: verify that public hostname makes sense and is resolvable - pub_h = metadata['ec2_compat']['public-hostname'] - facts['network']['public_hostname'] = pub_h + for f_var, h_var, ip_var in [('hostname', 'hostname', 'local-ipv4'), + ('public_hostname', 'public-hostname', 'public-ipv4')]: + try: + if socket.gethostbyname(metadata['ec2_compat'][h_var]) == metadata['ec2_compat'][ip_var]: + facts['network'][f_var] = metadata['ec2_compat'][h_var] + else: + facts['network'][f_var] = metadata['ec2_compat'][ip_var] + except socket.gaierror: + facts['network'][f_var] = metadata['ec2_compat'][ip_var] return facts diff --git a/roles/openshift_hosted_logging/tasks/deploy_logging.yaml b/roles/openshift_hosted_logging/tasks/deploy_logging.yaml index f64c56248..65af1c08e 100644 --- a/roles/openshift_hosted_logging/tasks/deploy_logging.yaml +++ b/roles/openshift_hosted_logging/tasks/deploy_logging.yaml @@ -138,7 +138,7 @@ delay: 5 - name: "Deploy fluentd by labeling the node" - shell: "{{ openshift.common.client_binary }} --config={{ mktemp.stdout }}/admin.kubeconfig label node --overwrite=true {{ '-l openshift_hosted_logging_fluentd_nodeselector' if openshift_hosted_logging_fluentd_nodeselector is defined else '--all' }} {{ openshift_hosted_logging_fluentd_nodeselector_label if openshift_hosted_logging_fluentd_nodeselector_label is defined else 'logging-infra-fluentd=true' }}" + shell: "{{ openshift.common.client_binary }} --config={{ mktemp.stdout }}/admin.kubeconfig label node --overwrite=true {{ '-l' ~ openshift_hosted_logging_fluentd_nodeselector if openshift_hosted_logging_fluentd_nodeselector is defined else '--all' }} {{ openshift_hosted_logging_fluentd_nodeselector_label if openshift_hosted_logging_fluentd_nodeselector_label is defined else 'logging-infra-fluentd=true' }}" - name: "Wait for fluentd to be running" shell: "{{ openshift.common.client_binary }} --config={{ mktemp.stdout }}/admin.kubeconfig get pods -l component=fluentd | grep Running" diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index ce2f96723..1a59717c7 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -168,10 +168,21 @@ - include: set_loopback_context.yml when: openshift.common.version_gte_3_2_or_1_2 +# TODO: Master startup can fail when ec2 transparently reallocates the block +# storage, causing etcd writes to temporarily fail. Retry failures blindly just +# once to allow time for this transient condition to to resolve and for systemd +# to restart the master (which will eventually succeed). +# +# https://github.com/coreos/etcd/issues/3864 +# https://github.com/openshift/origin/issues/6065 +# https://github.com/openshift/origin/issues/6447 - name: Start and enable master service: name={{ openshift.common.service_type }}-master enabled=yes state=started when: not openshift_master_ha | bool register: start_result + until: not start_result | failed + retries: 1 + delay: 60 notify: Verify API Server - name: Check for non-HA master service presence @@ -202,6 +213,9 @@ state: started when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] register: start_result + until: not start_result | failed + retries: 1 + delay: 60 - set_fact: master_api_service_status_changed: "{{ start_result | changed }}" @@ -218,6 +232,9 @@ state: started when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] register: start_result + until: not start_result | failed + retries: 1 + delay: 60 - set_fact: master_api_service_status_changed: "{{ start_result | changed }}" @@ -251,6 +268,9 @@ state: started when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0] register: start_result + until: not start_result | failed + retries: 1 + delay: 60 - pause: seconds: 15 @@ -263,6 +283,9 @@ state: started when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0] register: start_result + until: not start_result | failed + retries: 1 + delay: 60 - set_fact: master_controllers_service_status_changed: "{{ start_result | changed }}" diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index be07bd2d3..8e9c9f511 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -152,17 +152,9 @@ - name: Start and enable node service: name={{ openshift.common.service_type }}-node enabled=yes state=started register: node_start_result - ignore_errors: yes - -- name: Wait 30 seconds for docker initialization whenever node has failed - pause: - seconds: 30 - when: node_start_result | failed - -- name: Start and enable node again - service: name={{ openshift.common.service_type }}-node enabled=yes state=started - register: node_start_result - when: node_start_result | failed + until: not node_start_result | failed + retries: 1 + delay: 30 - set_fact: node_service_status_changed: "{{ node_start_result | changed }}" |