diff options
-rw-r--r-- | filter_plugins/openshift_master.py | 28 | ||||
l--------- | playbooks/byo/openshift-master/filter_plugins | 1 | ||||
l--------- | playbooks/byo/openshift-master/lookup_plugins | 1 | ||||
-rw-r--r-- | playbooks/byo/openshift-master/restart.yml | 4 | ||||
l--------- | playbooks/byo/openshift-master/roles | 1 | ||||
-rw-r--r-- | playbooks/common/openshift-master/restart.yml | 141 | ||||
-rw-r--r-- | playbooks/common/openshift-master/restart_hosts.yml | 39 | ||||
-rw-r--r-- | playbooks/common/openshift-master/restart_hosts_pacemaker.yml | 25 | ||||
-rw-r--r-- | playbooks/common/openshift-master/restart_services.yml | 27 | ||||
-rw-r--r-- | playbooks/common/openshift-master/restart_services_pacemaker.yml | 10 | ||||
-rw-r--r-- | roles/openshift_facts/tasks/main.yml | 1 | ||||
-rw-r--r-- | roles/os_zabbix/tasks/main.yml | 4 | ||||
-rw-r--r-- | utils/src/ooinstall/cli_installer.py | 13 | ||||
-rw-r--r-- | utils/src/ooinstall/oo_config.py | 4 | ||||
-rw-r--r-- | utils/src/ooinstall/openshift_ansible.py | 13 | ||||
-rw-r--r-- | utils/test/cli_installer_tests.py | 42 | ||||
-rw-r--r-- | utils/test/fixture.py | 16 |
17 files changed, 333 insertions, 37 deletions
diff --git a/filter_plugins/openshift_master.py b/filter_plugins/openshift_master.py index 8d7c62ad1..7ababc51a 100644 --- a/filter_plugins/openshift_master.py +++ b/filter_plugins/openshift_master.py @@ -463,6 +463,32 @@ class FilterModule(object): IdentityProviderBase.validate_idp_list(idp_list) return yaml.safe_dump([idp.to_dict() for idp in idp_list], default_flow_style=False) + @staticmethod + def validate_pcs_cluster(data, masters=None): + ''' Validates output from "pcs status", ensuring that each master + provided is online. + Ex: data = ('...', + 'PCSD Status:', + 'master1.example.com: Online', + 'master2.example.com: Online', + 'master3.example.com: Online', + '...') + masters = ['master1.example.com', + 'master2.example.com', + 'master3.example.com'] + returns True + ''' + if not issubclass(type(data), str): + raise errors.AnsibleFilterError("|failed expects data is a string") + if not issubclass(type(masters), list): + raise errors.AnsibleFilterError("|failed expects masters is a list") + valid = True + for master in masters: + if "{0}: Online".format(master) not in data: + valid = False + return valid + def filters(self): ''' returns a mapping of filters to methods ''' - return {"translate_idps": self.translate_idps} + return {"translate_idps": self.translate_idps, + "validate_pcs_cluster": self.validate_pcs_cluster} diff --git a/playbooks/byo/openshift-master/filter_plugins b/playbooks/byo/openshift-master/filter_plugins new file mode 120000 index 000000000..99a95e4ca --- /dev/null +++ b/playbooks/byo/openshift-master/filter_plugins @@ -0,0 +1 @@ +../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/byo/openshift-master/lookup_plugins b/playbooks/byo/openshift-master/lookup_plugins new file mode 120000 index 000000000..ac79701db --- /dev/null +++ b/playbooks/byo/openshift-master/lookup_plugins @@ -0,0 +1 @@ +../../../lookup_plugins
\ No newline at end of file diff --git a/playbooks/byo/openshift-master/restart.yml b/playbooks/byo/openshift-master/restart.yml new file mode 100644 index 000000000..a78a6aa3d --- /dev/null +++ b/playbooks/byo/openshift-master/restart.yml @@ -0,0 +1,4 @@ +--- +- include: ../../common/openshift-master/restart.yml + vars_files: + - ../../byo/openshift-cluster/cluster_hosts.yml diff --git a/playbooks/byo/openshift-master/roles b/playbooks/byo/openshift-master/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/byo/openshift-master/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-master/restart.yml b/playbooks/common/openshift-master/restart.yml new file mode 100644 index 000000000..fa13a64cb --- /dev/null +++ b/playbooks/common/openshift-master/restart.yml @@ -0,0 +1,141 @@ +--- +- include: ../openshift-cluster/evaluate_groups.yml + +- name: Validate configuration for rolling restart + hosts: oo_masters_to_config + roles: + - openshift_facts + tasks: + - fail: + msg: "openshift_rolling_restart_mode must be set to either 'services' or 'system'" + when: openshift_rolling_restart_mode is defined and openshift_rolling_restart_mode not in ["services", "system"] + - openshift_facts: + role: "{{ item.role }}" + local_facts: "{{ item.local_facts }}" + with_items: + - role: common + local_facts: + rolling_restart_mode: "{{ openshift_rolling_restart_mode | default('services') }}" + - role: master + local_facts: + cluster_method: "{{ openshift_master_cluster_method | default(None) }}" + +# Creating a temp file on localhost, we then check each system that will +# be rebooted to see if that file exists, if so we know we're running +# ansible on a machine that needs a reboot, and we need to error out. +- name: Create temp file on localhost + hosts: localhost + connection: local + become: no + gather_facts: no + tasks: + - local_action: command mktemp + register: mktemp + changed_when: false + +- name: Check if temp file exists on any masters + hosts: oo_masters_to_config + tasks: + - stat: path="{{ hostvars.localhost.mktemp.stdout }}" + register: exists + changed_when: false + +- name: Cleanup temp file on localhost + hosts: localhost + connection: local + become: no + gather_facts: no + tasks: + - file: path="{{ hostvars.localhost.mktemp.stdout }}" state=absent + changed_when: false + +- name: Warn if restarting the system where ansible is running + hosts: oo_masters_to_config + tasks: + - pause: + prompt: > + Warning: Running playbook from a host that will be restarted! + Press CTRL+C and A to abort playbook execution. You may + continue by pressing ENTER but the playbook will stop + executing once this system restarts and services must be + manually verified. + when: exists.stat.exists and openshift.common.rolling_restart_mode == 'system' + - set_fact: + current_host: "{{ exists.stat.exists }}" + when: openshift.common.rolling_restart_mode == 'system' + +- name: Determine which masters are currently active + hosts: oo_masters_to_config + tasks: + - name: Check master service status + command: > + systemctl is-active {{ openshift.common.service_type }}-master + register: active_check_output + when: openshift.master.cluster_method == 'pacemaker' + failed_when: active_check_output.stdout not in ['active', 'inactive'] + changed_when: false + - set_fact: + is_active: "{{ active_check_output.stdout == 'active' }}" + when: openshift.master.cluster_method == 'pacemaker' + +- name: Evaluate master groups + hosts: localhost + become: no + tasks: + - name: Evaluate oo_active_masters + add_host: + name: "{{ item }}" + groups: oo_active_masters + ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" + ansible_sudo: "{{ g_sudo | default(omit) }}" + with_items: "{{ groups.oo_masters_to_config | default([]) }}" + when: (hostvars[item]['is_active'] | default(false)) | bool + - name: Evaluate oo_current_masters + add_host: + name: "{{ item }}" + groups: oo_current_masters + ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" + ansible_sudo: "{{ g_sudo | default(omit) }}" + with_items: "{{ groups.oo_masters_to_config | default([]) }}" + when: (hostvars[item]['current_host'] | default(false)) | bool + +- name: Validate pacemaker cluster + hosts: oo_active_masters + tasks: + - name: Retrieve pcs status + command: pcs status + register: pcs_status_output + changed_when: false + - fail: + msg: > + Pacemaker cluster validation failed. One or more nodes are not online. + when: not (pcs_status_output.stdout | validate_pcs_cluster(groups.oo_masters_to_config)) | bool + +- name: Restart masters + hosts: oo_masters_to_config:!oo_active_masters:!oo_current_masters + vars: + openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}" + serial: 1 + tasks: + - include: restart_hosts.yml + when: openshift.common.rolling_restart_mode == 'system' + - include: restart_services.yml + when: openshift.common.rolling_restart_mode == 'services' + +- name: Restart active masters + hosts: oo_active_masters + serial: 1 + tasks: + - include: restart_hosts_pacemaker.yml + when: openshift.common.rolling_restart_mode == 'system' + - include: restart_services_pacemaker.yml + when: openshift.common.rolling_restart_mode == 'services' + +- name: Restart current masters + hosts: oo_current_masters + serial: 1 + tasks: + - include: restart_hosts.yml + when: openshift.common.rolling_restart_mode == 'system' + - include: restart_services.yml + when: openshift.common.rolling_restart_mode == 'services' diff --git a/playbooks/common/openshift-master/restart_hosts.yml b/playbooks/common/openshift-master/restart_hosts.yml new file mode 100644 index 000000000..ff206f5a2 --- /dev/null +++ b/playbooks/common/openshift-master/restart_hosts.yml @@ -0,0 +1,39 @@ +- name: Restart master system + # https://github.com/ansible/ansible/issues/10616 + shell: sleep 2 && shutdown -r now "OpenShift Ansible master rolling restart" + async: 1 + poll: 0 + ignore_errors: true + become: yes +# When cluster_method != pacemaker we can ensure the api_port is +# available. +- name: Wait for master API to come back online + become: no + local_action: + module: wait_for + host="{{ inventory_hostname }}" + state=started + delay=10 + port="{{ openshift.master.api_port }}" + when: openshift.master.cluster_method != 'pacemaker' +- name: Wait for master to start + become: no + local_action: + module: wait_for + host="{{ inventory_hostname }}" + state=started + delay=10 + port=22 + when: openshift.master.cluster_method == 'pacemaker' +- name: Wait for master to become available + command: pcs status + register: pcs_status_output + until: pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname]) | bool + retries: 15 + delay: 2 + changed_when: false + when: openshift.master.cluster_method == 'pacemaker' +- fail: + msg: > + Pacemaker cluster validation failed {{ inventory hostname }} is not online. + when: openshift.master.cluster_method == 'pacemaker' and not (pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname])) | bool diff --git a/playbooks/common/openshift-master/restart_hosts_pacemaker.yml b/playbooks/common/openshift-master/restart_hosts_pacemaker.yml new file mode 100644 index 000000000..c9219e8de --- /dev/null +++ b/playbooks/common/openshift-master/restart_hosts_pacemaker.yml @@ -0,0 +1,25 @@ +- name: Fail over master resource + command: > + pcs resource move master {{ hostvars | oo_select_keys(groups['oo_masters_to_config']) | oo_collect('openshift.common.hostname', {'is_active': 'False'}) | list | first }} +- name: Wait for master API to come back online + become: no + local_action: + module: wait_for + host="{{ openshift.master.cluster_hostname }}" + state=started + delay=10 + port="{{ openshift.master.api_port }}" +- name: Restart master system + # https://github.com/ansible/ansible/issues/10616 + shell: sleep 2 && shutdown -r now "OpenShift Ansible master rolling restart" + async: 1 + poll: 0 + ignore_errors: true + become: yes +- name: Wait for master to start + become: no + local_action: + module: wait_for + host="{{ inventory_hostname }}" + state=started + delay=10 diff --git a/playbooks/common/openshift-master/restart_services.yml b/playbooks/common/openshift-master/restart_services.yml new file mode 100644 index 000000000..5e539cd65 --- /dev/null +++ b/playbooks/common/openshift-master/restart_services.yml @@ -0,0 +1,27 @@ +- name: Restart master + service: + name: "{{ openshift.common.service_type }}-master" + state: restarted + when: not openshift_master_ha | bool +- name: Restart master API + service: + name: "{{ openshift.common.service_type }}-master-api" + state: restarted + when: openshift_master_ha | bool and openshift.master.cluster_method != 'pacemaker' +- name: Wait for master API to come back online + become: no + local_action: + module: wait_for + host="{{ inventory_hostname }}" + state=started + delay=10 + port="{{ openshift.master.api_port }}" + when: openshift_master_ha | bool and openshift.master.cluster_method != 'pacemaker' +- name: Restart master controllers + service: + name: "{{ openshift.common.service_type }}-master-controllers" + state: restarted + # Ignore errrors since it is possible that type != simple for + # pre-3.1.1 installations. + ignore_errors: true + when: openshift_master_ha | bool and openshift.master.cluster_method != 'pacemaker' diff --git a/playbooks/common/openshift-master/restart_services_pacemaker.yml b/playbooks/common/openshift-master/restart_services_pacemaker.yml new file mode 100644 index 000000000..e738f3fb6 --- /dev/null +++ b/playbooks/common/openshift-master/restart_services_pacemaker.yml @@ -0,0 +1,10 @@ +- name: Restart master services + command: pcs resource restart master +- name: Wait for master API to come back online + become: no + local_action: + module: wait_for + host="{{ openshift.master.cluster_hostname }}" + state=started + delay=10 + port="{{ openshift.master.api_port }}" diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 87fa99a3b..e40a1b329 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -10,6 +10,7 @@ shell: ls /run/ostree-booted ignore_errors: yes failed_when: false + changed_when: false register: ostree_output # Locally setup containerized facts for now diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index a8b65dd56..7552086d4 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -1,8 +1,4 @@ --- -- fail: - msg: "Zabbix config is not yet supported on atomic hosts" - when: openshift.common.is_containerized | bool - - name: Main List all templates zbx_template: zbx_server: "{{ ozb_server }}" diff --git a/utils/src/ooinstall/cli_installer.py b/utils/src/ooinstall/cli_installer.py index 3c6de44cf..4e30929da 100644 --- a/utils/src/ooinstall/cli_installer.py +++ b/utils/src/ooinstall/cli_installer.py @@ -33,9 +33,7 @@ def is_valid_hostname(hostname): def validate_prompt_hostname(hostname): if '' == hostname or is_valid_hostname(hostname): return hostname - raise click.BadParameter('"{}" appears to be an invalid hostname. ' \ - 'Please double-check this value i' \ - 'and re-enter it.'.format(hostname)) + raise click.BadParameter('Invalid hostname. Please double-check this value and re-enter it.') def get_ansible_ssh_user(): click.clear() @@ -72,7 +70,7 @@ def delete_hosts(hosts): click.echo("\"{}\" doesn't coorespond to any valid input.".format(del_idx)) return hosts, None -def collect_hosts(oo_cfg, masters_set=False, print_summary=True): +def collect_hosts(oo_cfg, existing_env=False, masters_set=False, print_summary=True): """ Collect host information from user. This will later be filled in using ansible. @@ -138,6 +136,11 @@ http://docs.openshift.com/enterprise/latest/architecture/infrastructure_componen else: host_props['containerized'] = False + if existing_env: + host_props['new_host'] = True + else: + host_props['new_host'] = False + host = Host(**host_props) hosts.append(host) @@ -506,7 +509,7 @@ def collect_new_nodes(oo_cfg): Add new nodes here """ click.echo(message) - return collect_hosts(oo_cfg, masters_set=True, print_summary=False) + return collect_hosts(oo_cfg, existing_env=True, masters_set=True, print_summary=False) def get_installed_hosts(hosts, callback_facts): installed_hosts = [] diff --git a/utils/src/ooinstall/oo_config.py b/utils/src/ooinstall/oo_config.py index 031b82bc1..33ab27567 100644 --- a/utils/src/ooinstall/oo_config.py +++ b/utils/src/ooinstall/oo_config.py @@ -38,6 +38,7 @@ class Host(object): self.public_hostname = kwargs.get('public_hostname', None) self.connect_to = kwargs.get('connect_to', None) self.preconfigured = kwargs.get('preconfigured', None) + self.new_host = kwargs.get('new_host', None) # Should this host run as an OpenShift master: self.master = kwargs.get('master', False) @@ -68,7 +69,8 @@ class Host(object): """ Used when exporting to yaml. """ d = {} for prop in ['ip', 'hostname', 'public_ip', 'public_hostname', - 'master', 'node', 'master_lb', 'containerized', 'connect_to', 'preconfigured']: + 'master', 'node', 'master_lb', 'containerized', + 'connect_to', 'preconfigured', 'new_host']: # If the property is defined (not None or False), export it: if getattr(self, prop): d[prop] = getattr(self, prop) diff --git a/utils/src/ooinstall/openshift_ansible.py b/utils/src/ooinstall/openshift_ansible.py index fd2cd7fbd..20401f812 100644 --- a/utils/src/ooinstall/openshift_ansible.py +++ b/utils/src/ooinstall/openshift_ansible.py @@ -19,13 +19,15 @@ def generate_inventory(hosts): global CFG masters = [host for host in hosts if host.master] nodes = [host for host in hosts if host.node] + new_nodes = [host for host in hosts if host.node and host.new_host] proxy = determine_proxy_configuration(hosts) multiple_masters = len(masters) > 1 + scaleup = len(new_nodes) > 0 base_inventory_path = CFG.settings['ansible_inventory_path'] base_inventory = open(base_inventory_path, 'w') - write_inventory_children(base_inventory, multiple_masters, proxy) + write_inventory_children(base_inventory, multiple_masters, proxy, scaleup) write_inventory_vars(base_inventory, multiple_masters, proxy) @@ -71,6 +73,11 @@ def generate_inventory(hosts): base_inventory.write('\n[lb]\n') write_host(proxy, base_inventory) + if scaleup: + base_inventory.write('\n[new_nodes]\n') + for node in new_nodes: + write_host(node, base_inventory) + base_inventory.close() return base_inventory_path @@ -84,12 +91,14 @@ def determine_proxy_configuration(hosts): return None -def write_inventory_children(base_inventory, multiple_masters, proxy): +def write_inventory_children(base_inventory, multiple_masters, proxy, scaleup): global CFG base_inventory.write('\n[OSEv3:children]\n') base_inventory.write('masters\n') base_inventory.write('nodes\n') + if scaleup: + base_inventory.write('new_nodes\n') if multiple_masters: base_inventory.write('etcd\n') if not getattr(proxy, 'preconfigured', True): diff --git a/utils/test/cli_installer_tests.py b/utils/test/cli_installer_tests.py index ea380d565..72e8521d0 100644 --- a/utils/test/cli_installer_tests.py +++ b/utils/test/cli_installer_tests.py @@ -681,9 +681,9 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True), - ('10.0.0.2', False), - ('10.0.0.3', False)], + ('10.0.0.1', True, False), + ('10.0.0.2', False, False), + ('10.0.0.3', False, False)], ssh_user='root', variant_num=1, confirm_facts='y') @@ -722,10 +722,10 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True), - ('10.0.0.2', False), + ('10.0.0.1', True, False), + ('10.0.0.2', False, False), ], - add_nodes=[('10.0.0.3', False)], + add_nodes=[('10.0.0.3', False, False)], ssh_user='root', variant_num=1, confirm_facts='y') @@ -773,9 +773,9 @@ class AttendedCliTests(OOCliFixture): mock_facts['10.0.0.2']['common']['version'] = "3.0.0" cli_input = build_input(hosts=[ - ('10.0.0.1', True), + ('10.0.0.1', True, False), ], - add_nodes=[('10.0.0.2', False)], + add_nodes=[('10.0.0.2', False, False)], ssh_user='root', variant_num=1, schedulable_masters_ok=True, @@ -796,10 +796,10 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True), - ('10.0.0.2', True), - ('10.0.0.3', True), - ('10.0.0.4', False)], + ('10.0.0.1', True, False), + ('10.0.0.2', True, False), + ('10.0.0.3', True, False), + ('10.0.0.4', False, False)], ssh_user='root', variant_num=1, confirm_facts='y', @@ -837,9 +837,9 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True), - ('10.0.0.2', True), - ('10.0.0.3', True)], + ('10.0.0.1', True, False), + ('10.0.0.2', True, False), + ('10.0.0.3', True, False)], ssh_user='root', variant_num=1, confirm_facts='y', @@ -872,10 +872,10 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True), - ('10.0.0.2', True), - ('10.0.0.3', False), - ('10.0.0.4', True)], + ('10.0.0.1', True, False), + ('10.0.0.2', True, False), + ('10.0.0.3', False, False), + ('10.0.0.4', True, False)], ssh_user='root', variant_num=1, confirm_facts='y', @@ -893,7 +893,7 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True)], + ('10.0.0.1', True, False)], ssh_user='root', variant_num=1, confirm_facts='y') @@ -921,7 +921,7 @@ class AttendedCliTests(OOCliFixture): run_playbook_mock.return_value = 0 cli_input = build_input(hosts=[ - ('10.0.0.1', True)], + ('10.0.0.1', True, False)], ssh_user='root', variant_num=2, confirm_facts='y') diff --git a/utils/test/fixture.py b/utils/test/fixture.py index 90bd9e1ef..be759578a 100644 --- a/utils/test/fixture.py +++ b/utils/test/fixture.py @@ -138,7 +138,7 @@ class OOCliFixture(OOInstallFixture): self.assertEquals(exp_hosts_to_run_on_len, len(hosts_to_run_on)) -#pylint: disable=too-many-arguments,too-many-branches +#pylint: disable=too-many-arguments,too-many-branches,too-many-statements def build_input(ssh_user=None, hosts=None, variant_num=None, add_nodes=None, confirm_facts=None, schedulable_masters_ok=None, master_lb=None): @@ -163,13 +163,19 @@ def build_input(ssh_user=None, hosts=None, variant_num=None, num_masters = 0 if hosts: i = 0 - for (host, is_master) in hosts: + for (host, is_master, is_containerized) in hosts: inputs.append(host) if is_master: inputs.append('y') num_masters += 1 else: inputs.append('n') + + if is_containerized: + inputs.append('container') + else: + inputs.append('rpm') + #inputs.append('rpm') # We should not be prompted to add more hosts if we're currently at # 2 masters, this is an invalid HA configuration, so this question @@ -196,8 +202,12 @@ def build_input(ssh_user=None, hosts=None, variant_num=None, inputs.append('y') inputs.append('1') # Add more nodes i = 0 - for (host, is_master) in add_nodes: + for (host, is_master, is_containerized) in add_nodes: inputs.append(host) + if is_containerized: + inputs.append('container') + else: + inputs.append('rpm') #inputs.append('rpm') if i < len(add_nodes) - 1: inputs.append('y') # Add more hosts |