diff options
-rw-r--r-- | playbooks/adhoc/upgrades/files/pre-upgrade-check | 185 | ||||
-rw-r--r-- | playbooks/adhoc/upgrades/upgrade.yml | 61 | ||||
-rw-r--r-- | roles/openshift_master/tasks/main.yml | 5 | ||||
-rw-r--r-- | utils/src/ooinstall/cli_installer.py | 4 |
4 files changed, 251 insertions, 4 deletions
diff --git a/playbooks/adhoc/upgrades/files/pre-upgrade-check b/playbooks/adhoc/upgrades/files/pre-upgrade-check new file mode 100644 index 000000000..c8ecae399 --- /dev/null +++ b/playbooks/adhoc/upgrades/files/pre-upgrade-check @@ -0,0 +1,185 @@ +#!/usr/bin/env python +""" +Pre-upgrade checks that must be run on a master before proceeding with upgrade. +""" +# This is a script not a python module: +# pylint: disable=invalid-name + +# NOTE: This script should not require any python libs other than what is +# in the standard library. + +__license__ = "ASL 2.0" + +import json +import os +import subprocess +import re + +# The maximum length of container.ports.name +ALLOWED_LENGTH = 15 +# The valid structure of container.ports.name +ALLOWED_CHARS = re.compile('^[a-z0-9][a-z0-9\\-]*[a-z0-9]$') +AT_LEAST_ONE_LETTER = re.compile('[a-z]') +# look at OS_PATH for the full path. Default ot 'oc' +OC_PATH = os.getenv('OC_PATH', 'oc') + + +def validate(value): + """ + validate verifies that value matches required conventions + + Rules of container.ports.name validation: + + * must be less that 16 chars + * at least one letter + * only a-z0-9- + * hyphens can not be leading or trailing or next to each other + + :Parameters: + - `value`: Value to validate + """ + if len(value) > ALLOWED_LENGTH: + return False + + if '--' in value: + return False + + # We search since it can be anywhere + if not AT_LEAST_ONE_LETTER.search(value): + return False + + # We match because it must start at the beginning + if not ALLOWED_CHARS.match(value): + return False + return True + + +def list_items(kind): + """ + list_items returns a list of items from the api + + :Parameters: + - `kind`: Kind of item to access + """ + response = subprocess.check_output([OC_PATH, 'get', '--all-namespaces', '-o', 'json', kind]) + items = json.loads(response) + return items.get("items", []) + + +def get(obj, *paths): + """ + Gets an object + + :Parameters: + - `obj`: A dictionary structure + - `path`: All other non-keyword arguments + """ + ret_obj = obj + for path in paths: + if ret_obj.get(path, None) is None: + return [] + ret_obj = ret_obj[path] + return ret_obj + + +# pylint: disable=too-many-arguments +def pretty_print_errors(namespace, kind, item_name, container_name, port_name, valid): + """ + Prints out results in human friendly way. + + :Parameters: + - `namespace`: Namespace of the resource + - `kind`: Kind of the resource + - `item_name`: Name of the resource + - `container_name`: Name of the container. May be "" when kind=Service. + - `port_name`: Name of the port + - `valid`: True if the port is valid + """ + if not valid: + if len(container_name) > 0: + print('%s/%s -n %s (Container="%s" Port="%s")' % ( + kind, item_name, namespace, container_name, port_name)) + else: + print('%s/%s -n %s (Port="%s")' % ( + kind, item_name, namespace, port_name)) + + +def print_validation_header(): + """ + Prints the error header. Should run on the first error to avoid + overwhelming the user. + """ + print """\ +At least one port name does not validate. Valid port names: + + * must be less that 16 chars + * have at least one letter + * only a-z0-9- + * do not start or end with - + * Dashes may not be next to eachother ('--') +""" + + +def main(): + """ + main is the main entry point to this script + """ + try: + # the comma at the end suppresses the newline + print "Checking for oc ...", + subprocess.check_output([OC_PATH, 'whoami']) + print "found" + except: + print( + 'Can not find oc (%s). Override the path with the ' + 'OC_PATH environment variable. Exiting...' % OC_PATH) + raise SystemExit(1) + + # Where the magic happens + first_error = True + for kind, path in [ + ('replicationcontrollers', ("spec", "template", "spec", "containers")), + ('pods', ("spec", "containers")), + ('deploymentconfigs', ("spec", "template", "spec", "containers"))]: + for item in list_items(kind): + namespace = item["metadata"]["namespace"] + item_name = item["metadata"]["name"] + for container in get(item, *path): + container_name = container["name"] + for port in get(container, "ports"): + port_name = port.get("name", None) + if not port_name: + # Unnamed ports are OK + continue + valid = validate(port_name) + if not valid and first_error: + first_error = False + print_validation_header() + pretty_print_errors( + namespace, kind, item_name, + container_name, port_name, valid) + + # Services follow a different flow + for item in list_items('services'): + namespace = item["metadata"]["namespace"] + item_name = item["metadata"]["name"] + for port in get(item, "spec", "ports"): + port_name = port.get("targetPort", None) + if isinstance(port_name, int) or port_name is None: + # Integer only or unnamed ports are OK + continue + valid = validate(port_name) + if not valid and first_error: + first_error = False + print_validation_header() + pretty_print_errors( + namespace, "services", item_name, "", port_name, valid) + + # If we had at least 1 error then exit with 1 + if not first_error: + raise SystemExit(1) + + +if __name__ == '__main__': + main() + diff --git a/playbooks/adhoc/upgrades/upgrade.yml b/playbooks/adhoc/upgrades/upgrade.yml index e63add4d1..dab0195ac 100644 --- a/playbooks/adhoc/upgrades/upgrade.yml +++ b/playbooks/adhoc/upgrades/upgrade.yml @@ -6,12 +6,24 @@ - name: Verify upgrade can proceed hosts: masters[0] + vars: + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" gather_facts: no tasks: # Checking the global deployment type rather than host facts, this is about # what the user is requesting. - fail: msg="Deployment type enterprise not supported for upgrade" when: deployment_type == "enterprise" + # Pacemaker is currently the only supported upgrade path for multiple masters + - fail: msg="openshift_master_cluster_method must be set to 'pacemaker'" + when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker")) + +- name: Run pre-upgrade checks on first master + hosts: masters[0] + tasks: + # If this script errors out ansible will show the default stdout/stderr + # which contains details for the user: + - script: files/pre-upgrade-check - name: Evaluate etcd_hosts hosts: localhost @@ -182,8 +194,6 @@ command: > tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz -C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} . - args: - creates: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz" with_items: masters_needing_certs - name: Retrieve the master cert tarball from the master @@ -195,11 +205,11 @@ validate_checksum: yes with_items: masters_needing_certs - - name: Sync certs and restart masters post configuration change hosts: masters vars: sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}" + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" tasks: - name: Unarchive the tarball on the master unarchive: @@ -209,7 +219,41 @@ - name: Restart master services service: name="{{ openshift.common.service_type}}-master" state=restarted + when: not openshift_master_ha | bool + +- name: Destroy cluster + hosts: masters[0] + vars: + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" + openshift_deployment_type: "{{ deployment_type }}" + pre_tasks: + - name: Check for configured cluster + stat: + path: /etc/corosync/corosync.conf + register: corosync_conf + when: openshift_master_ha | bool + - name: Destroy cluster + command: pcs cluster destroy --all + when: openshift_master_ha | bool and corosync_conf.stat.exists == true + +- name: Start pcsd on masters + hosts: masters + vars: + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" + tasks: + - name: Start pcsd + service: name=pcsd state=started + when: openshift_master_ha | bool +- name: Re-create cluster + hosts: masters[0] + vars: + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" + openshift_deployment_type: "{{ deployment_type }}" + omc_cluster_hosts: "{{ groups.masters | join(' ') }}" + roles: + - role: openshift_master_cluster + when: openshift_master_ha | bool - name: Delete temporary directory on localhost hosts: localhost @@ -255,10 +299,21 @@ - name: Restart masters post reconcile hosts: masters + vars: + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" tasks: - name: Restart master services service: name="{{ openshift.common.service_type}}-master" state=restarted + when: not openshift_master_ha | bool +- name: Restart cluster post reconcile + hosts: masters[0] + vars: + openshift_master_ha: "{{ groups['masters'] | length > 1 }}" + tasks: + - name: Restart master cluster + command: pcs resource restart master + when: openshift_master_ha | bool - name: Upgrade default router and registry hosts: masters[0] diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index be77fce4a..35570923c 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -140,22 +140,27 @@ src: atomic-openshift-master-api.service.j2 dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-api.service force: no + when: openshift_master_ha | bool and openshift_master_cluster_method == "native" - name: Create the controllers service file template: src: atomic-openshift-master-controllers.service.j2 dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-controllers.service force: no + when: openshift_master_ha | bool and openshift_master_cluster_method == "native" - name: Create the api env file template: src: atomic-openshift-master-api.j2 dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api force: no + when: openshift_master_ha | bool and openshift_master_cluster_method == "native" - name: Create the controllers env file template: src: atomic-openshift-master-controllers.j2 dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers force: no + when: openshift_master_ha | bool and openshift_master_cluster_method == "native" - command: systemctl daemon-reload + when: openshift_master_ha | bool and openshift_master_cluster_method == "native" # end workaround for missing systemd unit files - name: Create session secrets file diff --git a/utils/src/ooinstall/cli_installer.py b/utils/src/ooinstall/cli_installer.py index 9f0861b77..e63f14816 100644 --- a/utils/src/ooinstall/cli_installer.py +++ b/utils/src/ooinstall/cli_installer.py @@ -323,6 +323,8 @@ def get_installed_hosts(hosts, callback_facts): installed_hosts.append(host) return installed_hosts +# pylint: disable=too-many-branches +# This pylint error will be corrected shortly in separate PR. def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose): # Copy the list of existing hosts so we can remove any already installed nodes. @@ -383,7 +385,7 @@ def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose): openshift_ansible.set_config(oo_cfg) click.echo('Gathering information from hosts...') - callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts) + callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts, verbose) if error: click.echo("There was a problem fetching the required information. " \ "See {} for details.".format(oo_cfg.settings['ansible_log_path'])) |