summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--playbooks/common/openshift-etcd/migrate.yml65
-rw-r--r--playbooks/common/openshift-etcd/scaleup.yml13
-rw-r--r--roles/etcd_common/defaults/main.yml10
-rw-r--r--roles/etcd_migrate/tasks/add_ttls.yml33
-rw-r--r--roles/etcd_migrate/tasks/check.yml3
-rw-r--r--roles/etcd_migrate/tasks/clean_data.yml5
-rw-r--r--roles/etcd_migrate/tasks/main.yml4
-rw-r--r--roles/etcd_migrate/tasks/migrate.yml49
8 files changed, 134 insertions, 48 deletions
diff --git a/playbooks/common/openshift-etcd/migrate.yml b/playbooks/common/openshift-etcd/migrate.yml
index 3e7a48669..311ff84b6 100644
--- a/playbooks/common/openshift-etcd/migrate.yml
+++ b/playbooks/common/openshift-etcd/migrate.yml
@@ -17,18 +17,14 @@
tags:
- always
+# TODO: This will be different for release-3.6 branch
- name: Prepare masters for etcd data migration
hosts: oo_masters_to_config
tasks:
- set_fact:
master_services:
- - "{{ openshift.common.service_type + '-master' }}"
- - set_fact:
- master_services:
- "{{ openshift.common.service_type + '-master-controllers' }}"
- "{{ openshift.common.service_type + '-master-api' }}"
- when:
- - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool
- debug:
msg: "master service name: {{ master_services }}"
- name: Stop masters
@@ -67,16 +63,59 @@
when:
- etcd_backup_failed | length > 0
-- name: Migrate etcd data from v2 to v3
+- name: Stop etcd
hosts: oo_etcd_to_migrate
gather_facts: no
tags:
- always
+ pre_tasks:
+ - set_fact:
+ l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}"
+ - name: Disable etcd members
+ service:
+ name: "{{ l_etcd_service }}"
+ state: stopped
+
+- name: Migrate data on first etcd
+ hosts: oo_etcd_to_migrate[0]
+ gather_facts: no
+ tags:
+ - always
roles:
- role: etcd_migrate
r_etcd_migrate_action: migrate
r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
etcd_peer: "{{ ansible_default_ipv4.address }}"
+ etcd_url_scheme: "https"
+ etcd_peer_url_scheme: "https"
+
+- name: Clean data stores on remaining etcd hosts
+ hosts: oo_etcd_to_migrate[1:]
+ gather_facts: no
+ tags:
+ - always
+ roles:
+ - role: etcd_migrate
+ r_etcd_migrate_action: clean_data
+ r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
+ etcd_peer: "{{ ansible_default_ipv4.address }}"
+ etcd_url_scheme: "https"
+ etcd_peer_url_scheme: "https"
+ post_tasks:
+ - name: Add etcd hosts
+ delegate_to: localhost
+ add_host:
+ name: "{{ item }}"
+ groups: oo_new_etcd_to_config
+ ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+ ansible_become: "{{ g_sudo | default(omit) }}"
+ with_items: "{{ groups.oo_etcd_to_migrate[1:] | default([]) }}"
+ changed_when: no
+ - name: Set success
+ set_fact:
+ r_etcd_migrate_success: true
+
+- include: ./scaleup.yml
- name: Gate on etcd migration
hosts: oo_masters_to_config
@@ -89,6 +128,16 @@
- set_fact:
etcd_migration_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_migration_completed) }}"
+- name: Add TTLs on the first master
+ hosts: oo_first_master[0]
+ roles:
+ - role: etcd_migrate
+ r_etcd_migrate_action: add_ttls
+ etcd_peer: "{{ hostvars[groups.oo_etcd_to_migrate.0].ansible_default_ipv4.address }}"
+ etcd_url_scheme: "https"
+ etcd_peer_url_scheme: "https"
+ when: etcd_migration_failed | length == 0
+
- name: Configure masters if etcd data migration is succesfull
hosts: oo_masters_to_config
roles:
@@ -100,10 +149,6 @@
msg: "Skipping master re-configuration since migration failed."
when:
- etcd_migration_failed | length > 0
-
-- name: Start masters after etcd data migration
- hosts: oo_masters_to_config
- tasks:
- name: Start master services
service:
name: "{{ item }}"
diff --git a/playbooks/common/openshift-etcd/scaleup.yml b/playbooks/common/openshift-etcd/scaleup.yml
index 192305bc8..52b90daca 100644
--- a/playbooks/common/openshift-etcd/scaleup.yml
+++ b/playbooks/common/openshift-etcd/scaleup.yml
@@ -24,6 +24,9 @@
member add {{ etcd_hostname }} {{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_port }}
delegate_to: "{{ etcd_ca_host }}"
register: etcd_add_check
+ retries: 3
+ delay: 10
+ until: etcd_add_check.rc == 0
roles:
- role: openshift_etcd
when: etcd_add_check.rc == 0
@@ -36,3 +39,13 @@
r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
- role: nickhammond.logrotate
when: etcd_add_check.rc == 0
+ post_tasks:
+ - name: Verify cluster is stable
+ command: >
+ /usr/bin/etcdctl --cert-file {{ etcd_peer_cert_file }}
+ --key-file {{ etcd_peer_key_file }}
+ --ca-file {{ etcd_peer_ca_file }}
+ -C {{ etcd_peer_url_scheme }}://{{ hostvars[etcd_ca_host].etcd_hostname }}:{{ etcd_client_port }}
+ cluster-health
+ retries: 1
+ delay: 30
diff --git a/roles/etcd_common/defaults/main.yml b/roles/etcd_common/defaults/main.yml
index b1bfa4592..89993f7ea 100644
--- a/roles/etcd_common/defaults/main.yml
+++ b/roles/etcd_common/defaults/main.yml
@@ -63,3 +63,13 @@ etcd_client_port: 2379
etcd_peer_port: 2380
etcd_url_scheme: http
etcd_peer_url_scheme: http
+
+etcd_initial_cluster_state: new
+etcd_initial_cluster_token: etcd-cluster-1
+
+etcd_initial_advertise_peer_urls: "{{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_port }}"
+etcd_listen_peer_urls: "{{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_port }}"
+etcd_advertise_client_urls: "{{ etcd_url_scheme }}://{{ etcd_ip }}:{{ etcd_client_port }}"
+etcd_listen_client_urls: "{{ etcd_url_scheme }}://{{ etcd_ip }}:{{ etcd_client_port }}"
+
+etcd_systemd_dir: "/etc/systemd/system/{{ etcd_service }}.service.d"
diff --git a/roles/etcd_migrate/tasks/add_ttls.yml b/roles/etcd_migrate/tasks/add_ttls.yml
new file mode 100644
index 000000000..c10465af9
--- /dev/null
+++ b/roles/etcd_migrate/tasks/add_ttls.yml
@@ -0,0 +1,33 @@
+---
+# To be executed on first master
+- slurp:
+ src: "{{ openshift.common.config_base }}/master/master-config.yaml"
+ register: g_master_config_output
+
+- set_fact:
+ accessTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.accessTokenMaxAgeSeconds | default(86400) }}"
+ authroizeTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.authroizeTokenMaxAgeSeconds | default(500) }}"
+ controllerLeaseTTL: "{{ (g_master_config_output.content|b64decode|from_yaml).controllerLeaseTTL | default(30) }}"
+- name: Re-introduce leases (as a replacement for key TTLs)
+ command: >
+ oadm migrate etcd-ttl \
+ --cert {{ r_etcd_common_master_peer_cert_file }} \
+ --key {{ r_etcd_common_master_peer_key_file }} \
+ --cacert {{ r_etcd_common_master_peer_ca_file }} \
+ --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \
+ --ttl-keys-prefix {{ item.keys }} \
+ --lease-duration {{ item.ttl }}
+ environment:
+ ETCDCTL_API: 3
+ PATH: "/usr/local/bin:/var/usrlocal/bin:{{ ansible_env.PATH }}"
+ with_items:
+ - keys: "/kubernetes.io/events"
+ ttl: "1h"
+ - keys: "/kubernetes.io/masterleases"
+ ttl: "10s"
+ - keys: "/openshift.io/oauth/accesstokens"
+ ttl: "{{ accessTokenMaxAgeSeconds }}s"
+ - keys: "/openshift.io/oauth/authorizetokens"
+ ttl: "{{ authroizeTokenMaxAgeSeconds }}s"
+ - keys: "/openshift.io/leases/controllers"
+ ttl: "{{ controllerLeaseTTL }}s"
diff --git a/roles/etcd_migrate/tasks/check.yml b/roles/etcd_migrate/tasks/check.yml
index b66696b55..0804d9e1c 100644
--- a/roles/etcd_migrate/tasks/check.yml
+++ b/roles/etcd_migrate/tasks/check.yml
@@ -1,7 +1,4 @@
---
-- fail:
- msg: "Currently etcd v3 migration is unsupported while we test it more thoroughly"
- when: not openshift_enable_unsupported_configurations | default(false) | bool
# Check the cluster is healthy
- include: check_cluster_health.yml
diff --git a/roles/etcd_migrate/tasks/clean_data.yml b/roles/etcd_migrate/tasks/clean_data.yml
new file mode 100644
index 000000000..95a0e7c0a
--- /dev/null
+++ b/roles/etcd_migrate/tasks/clean_data.yml
@@ -0,0 +1,5 @@
+---
+- name: Remove member data
+ file:
+ path: /var/lib/etcd/member
+ state: absent
diff --git a/roles/etcd_migrate/tasks/main.yml b/roles/etcd_migrate/tasks/main.yml
index 409b0b613..e82f6a6b4 100644
--- a/roles/etcd_migrate/tasks/main.yml
+++ b/roles/etcd_migrate/tasks/main.yml
@@ -1,8 +1,8 @@
---
- name: Fail if invalid r_etcd_migrate_action provided
fail:
- msg: "etcd_migrate role can only be called with 'check' or 'migrate' or 'configure'"
- when: r_etcd_migrate_action not in ['check', 'migrate', 'configure']
+ msg: "etcd_migrate role can only be called with 'check', 'migrate', 'configure', 'add_ttls', or 'clean_data'"
+ when: r_etcd_migrate_action not in ['check', 'migrate', 'configure', 'add_ttls', 'clean_data']
- name: Include main action task file
include: "{{ r_etcd_migrate_action }}.yml"
diff --git a/roles/etcd_migrate/tasks/migrate.yml b/roles/etcd_migrate/tasks/migrate.yml
index b2cf6d20a..173de77f4 100644
--- a/roles/etcd_migrate/tasks/migrate.yml
+++ b/roles/etcd_migrate/tasks/migrate.yml
@@ -3,62 +3,45 @@
- set_fact:
l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}"
-- name: Disable etcd members
- service:
- name: "{{ l_etcd_service }}"
- state: stopped
-
-# Should we skip all TTL keys? https://bugzilla.redhat.com/show_bug.cgi?id=1389773
- name: Migrate etcd data
command: >
etcdctl migrate --data-dir={{ etcd_data_dir }}
environment:
ETCDCTL_API: 3
register: l_etcdctl_migrate
-
# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup
- name: Check the etcd v2 data are correctly migrated
fail:
msg: "Failed to migrate a member"
when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout"
-
- name: Migration message
debug:
msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}"
-
-- name: Enable etcd member
- service:
+- name: Set ETCD_FORCE_NEW_CLUSTER=true on first etcd host
+ lineinfile:
+ line: "ETCD_FORCE_NEW_CLUSTER=true"
+ dest: /etc/etcd/etcd.conf
+- name: Start etcd
+ systemd:
name: "{{ l_etcd_service }}"
state: started
+- name: Unset ETCD_FORCE_NEW_CLUSTER=true on first etcd host
+ lineinfile:
+ line: "ETCD_FORCE_NEW_CLUSTER=true"
+ dest: /etc/etcd/etcd.conf
+ state: absent
+- name: Restart first etcd host
+ systemd:
+ name: "{{ l_etcd_service }}"
+ state: restarted
-- name: Wait for cluster to become healthy after migration
+- name: Wait for cluster to become healthy after bringing up first member
command: >
etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health
register: l_etcd_migrate_health
until: l_etcd_migrate_health.rc == 0
retries: 3
delay: 30
- run_once: true
-
-# NOTE: /usr/local/bin may be removed from the PATH by ansible hence why
-# it's added to the environment in this task.
-- name: Re-introduce leases (as a replacement for key TTLs)
- command: >
- oadm migrate etcd-ttl \
- --cert {{ r_etcd_common_master_peer_cert_file }} \
- --key {{ r_etcd_common_master_peer_key_file }} \
- --cacert {{ r_etcd_common_master_peer_ca_file }} \
- --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \
- --ttl-keys-prefix {{ item }} \
- --lease-duration 1h
- environment:
- ETCDCTL_API: 3
- PATH: "/usr/local/bin:/var/usrlocal/bin:{{ ansible_env.PATH }}"
- with_items:
- - "/kubernetes.io/events"
- - "/kubernetes.io/masterleases"
- delegate_to: "{{ groups.oo_first_master[0] }}"
- run_once: true
- set_fact:
r_etcd_migrate_success: true