diff options
19 files changed, 644 insertions, 55 deletions
diff --git a/.papr.inventory b/.papr.inventory index 878d434e2..aa4324c21 100644 --- a/.papr.inventory +++ b/.papr.inventory @@ -11,6 +11,9 @@ openshift_image_tag="{{ lookup('env', 'OPENSHIFT_IMAGE_TAG') }}" openshift_master_default_subdomain="{{ lookup('env', 'RHCI_ocp_node1_IP') }}.xip.io" openshift_check_min_host_disk_gb=1.5 openshift_check_min_host_memory_gb=1.9 +osm_cluster_network_cidr=10.128.0.0/14 +openshift_portal_net=172.30.0.0/16 +osm_host_subnet_length=9 [masters] ocp-master diff --git a/images/installer/root/exports/config.json.template b/images/installer/root/exports/config.json.template index 739c0080f..1a009fa7b 100644 --- a/images/installer/root/exports/config.json.template +++ b/images/installer/root/exports/config.json.template @@ -24,7 +24,7 @@ "PLAYBOOK_FILE=$PLAYBOOK_FILE", "ANSIBLE_CONFIG=$ANSIBLE_CONFIG" ], - "cwd": "/opt/app-root/src/", + "cwd": "/usr/share/ansible/openshift-ansible", "rlimits": [ { "type": "RLIMIT_NOFILE", diff --git a/images/installer/root/usr/local/bin/run b/images/installer/root/usr/local/bin/run index 51ac566e5..70aa0bac3 100755 --- a/images/installer/root/usr/local/bin/run +++ b/images/installer/root/usr/local/bin/run @@ -39,7 +39,7 @@ if [[ "$ALLOW_ANSIBLE_CONNECTION_LOCAL" = false ]]; then fi if [[ -v VAULT_PASS ]]; then - VAULT_PASS_FILE=.vaultpass + VAULT_PASS_FILE="$(mktemp)" echo ${VAULT_PASS} > ${VAULT_PASS_FILE} VAULT_PASS_ARG="--vault-password-file ${VAULT_PASS_FILE}" fi diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index be15944d5..486fe56a0 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -613,7 +613,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # WORKAROUND : If you must use an overlapping subnet, you can configure a non conflicting # docker0 CIDR range by adding '--bip=192.168.2.1/24' to DOCKER_NETWORK_OPTIONS # environment variable located in /etc/sysconfig/docker-network. -# When upgrading these must be specificed! +# When upgrading or scaling up the following must match whats in your master config! +# Inventory: master yaml field +# osm_cluster_network_cidr: clusterNetworkCIDR +# openshift_portal_net: serviceNetworkCIDR +# When installing osm_cluster_network_cidr and openshift_portal_net must be set. +# Sane examples are provided below. #osm_cluster_network_cidr=10.128.0.0/14 #openshift_portal_net=172.30.0.0/16 @@ -635,7 +640,10 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Configure number of bits to allocate to each host’s subnet e.g. 9 # would mean a /23 network on the host. -# When upgrading this must be specificed! +# When upgrading or scaling up the following must match whats in your master config! +# Inventory: master yaml field +# osm_host_subnet_length: hostSubnetLength +# When installing osm_host_subnet_length must be set. A sane example is provided below. #osm_host_subnet_length=9 # Configure master API and console ports. diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index ad84e6aef..92a0927e5 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -621,7 +621,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # WORKAROUND : If you must use an overlapping subnet, you can configure a non conflicting # docker0 CIDR range by adding '--bip=192.168.2.1/24' to DOCKER_NETWORK_OPTIONS # environment variable located in /etc/sysconfig/docker-network. -# When upgrading these must be specificed! +# When upgrading or scaling up the following must match whats in your master config! +# Inventory: master yaml field +# osm_cluster_network_cidr: clusterNetworkCIDR +# openshift_portal_net: serviceNetworkCIDR +# When installing osm_cluster_network_cidr and openshift_portal_net must be set. +# Sane examples are provided below. #osm_cluster_network_cidr=10.128.0.0/14 #openshift_portal_net=172.30.0.0/16 @@ -643,7 +648,10 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Configure number of bits to allocate to each host’s subnet e.g. 9 # would mean a /23 network on the host. -# When upgrading this must be specificed! +# When upgrading or scaling up the following must match whats in your master config! +# Inventory: master yaml field +# osm_host_subnet_length: hostSubnetLength +# When installing osm_host_subnet_length must be set. A sane example is provided below. #osm_host_subnet_length=9 # Configure master API and console ports. diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml index 4c345dbe8..9a959a959 100644 --- a/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml @@ -9,21 +9,6 @@ deployment types when: deployment_type not in ['origin','openshift-enterprise', 'online'] - # osm_cluster_network_cidr, osm_host_subnet_length and openshift_portal_net are - # required when upgrading to avoid changes that may occur between releases - # Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1451023 - - assert: - that: - - "osm_cluster_network_cidr is defined" - - "osm_host_subnet_length is defined" - - "openshift_portal_net is defined" - msg: > - osm_cluster_network_cidr, osm_host_subnet_length, and openshift_portal_net are required inventory - variables when upgrading. These variables should match what is currently used in the cluster. If - you don't remember what these values are you can find them in /etc/origin/master/master-config.yaml - on a master with the names clusterNetworkCIDR (osm_cluster_network_cidr), - hostSubnetLength (osm_host_subnet_length), and serviceNetworkCIDR (openshift_portal_net). - # Error out in situations where the user has older versions specified in their # inventory in any of the openshift_release, openshift_image_tag, and # openshift_pkg_version variables. These must be removed or updated to proceed diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index c77d7bb87..e1b9a4964 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -179,28 +179,36 @@ openshift_master_count: "{{ openshift.master.master_count }}" openshift_master_session_auth_secrets: "{{ hostvars[groups.oo_first_master.0].openshift.master.session_auth_secrets }}" openshift_master_session_encryption_secrets: "{{ hostvars[groups.oo_first_master.0].openshift.master.session_encryption_secrets }}" - openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config'] - | union(groups['oo_masters_to_config']) - | union(groups['oo_etcd_to_config'] | default([]))) - | oo_collect('openshift.common.hostname') | default([]) | join (',') - }}" - openshift_no_proxy_etcd_host_ips: "{{ hostvars | oo_select_keys(groups['oo_etcd_to_config'] | default([])) - | oo_collect('openshift.common.ip') | default([]) | join(',') - }}" - roles: - - role: os_firewall - - role: openshift_master openshift_ca_host: "{{ groups.oo_first_master.0 }}" openshift_master_etcd_hosts: "{{ hostvars | oo_select_keys(groups['oo_etcd_to_config'] | default([])) | oo_collect('openshift.common.hostname') | default(none, true) }}" - openshift_master_hosts: "{{ groups.oo_masters_to_config }}" - r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" - etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}" + openshift_no_proxy_etcd_host_ips: "{{ hostvars | oo_select_keys(groups['oo_etcd_to_config'] | default([])) + | oo_collect('openshift.common.ip') | default([]) | join(',') + }}" + roles: + - role: os_firewall + - role: openshift_master_facts + - role: openshift_hosted_facts + - role: openshift_master_certificates + - role: openshift_etcd_client_certificates etcd_cert_subdir: "openshift-master-{{ openshift.common.hostname }}" etcd_cert_config_dir: "{{ openshift.common.config_base }}/master" etcd_cert_prefix: "master.etcd-" + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" + etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}" + when: groups.oo_etcd_to_config | default([]) | length != 0 + - role: openshift_clock + - role: openshift_cloud_provider + - role: openshift_builddefaults + - role: openshift_buildoverrides + - role: nickhammond.logrotate + - role: contiv + contiv_role: netmaster + when: openshift_use_contiv | default(False) | bool + - role: openshift_master + openshift_master_hosts: "{{ groups.oo_masters_to_config }}" r_openshift_master_clean_install: "{{ hostvars[groups.oo_first_master.0].l_clean_install }}" r_openshift_master_etcd3_storage: "{{ hostvars[groups.oo_first_master.0].l_etcd3_enabled }}" openshift_master_is_scaleup_host: "{{ g_openshift_master_is_scaleup | default(false) }}" @@ -211,7 +219,6 @@ when: openshift_use_nuage | default(false) | bool - role: calico_master when: openshift_use_calico | default(false) | bool - post_tasks: - name: Create group for deployment type group_by: key=oo_masters_deployment_type_{{ openshift.common.deployment_type }} diff --git a/playbooks/gcp/openshift-cluster/provision.yml b/playbooks/gcp/openshift-cluster/provision.yml new file mode 100644 index 000000000..a3d1d46a6 --- /dev/null +++ b/playbooks/gcp/openshift-cluster/provision.yml @@ -0,0 +1,19 @@ +--- +- name: Ensure all cloud resources necessary for the cluster, including instances, have been started + hosts: localhost + connection: local + gather_facts: no + tasks: + + - name: provision a GCP cluster in the specified project + include_role: + name: openshift_gcp + +- name: normalize groups + include: ../../byo/openshift-cluster/initialize_groups.yml + +- name: run the std_include + include: ../../common/openshift-cluster/std_include.yml + +- name: run the config + include: ../../common/openshift-cluster/config.yml diff --git a/roles/openshift_gcp/tasks/main.yaml b/roles/openshift_gcp/tasks/main.yaml new file mode 100644 index 000000000..ad205ba33 --- /dev/null +++ b/roles/openshift_gcp/tasks/main.yaml @@ -0,0 +1,43 @@ +# +# This role relies on gcloud invoked via templated bash in order to +# provide a high performance deployment option. The next logical step +# is to transition to a deployment manager template which is then instantiated. +# TODO: use a formal set of role parameters consistent with openshift_aws +# +--- +- name: Templatize DNS script + template: src=dns.j2.sh dest=/tmp/openshift_gcp_provision_dns.sh mode=u+rx +- name: Templatize provision script + template: src=provision.j2.sh dest=/tmp/openshift_gcp_provision.sh mode=u+rx +- name: Templatize de-provision script + template: src=remove.j2.sh dest=/tmp/openshift_gcp_provision_remove.sh mode=u+rx + when: + - state | default('present') == 'absent' + +- name: Provision GCP DNS domain + command: /tmp/openshift_gcp_provision_dns.sh + args: + chdir: "{{ playbook_dir }}/files" + register: dns_provision + when: + - state | default('present') == 'present' + +- name: Ensure that DNS resolves to the hosted zone + assert: + that: + - "lookup('dig', public_hosted_zone, 'qtype=NS', wantlist=True) | sort | join(',') == dns_provision.stdout" + msg: "The DNS domain {{ public_hosted_zone }} defined in 'public_hosted_zone' must have NS records pointing to the Google nameservers: '{{ dns_provision.stdout }}' instead of '{{ lookup('dig', public_hosted_zone, 'qtype=NS') }}'." + when: + - state | default('present') == 'present' + +- name: Provision GCP resources + command: /tmp/openshift_gcp_provision.sh + args: + chdir: "{{ playbook_dir }}/files" + when: + - state | default('present') == 'present' + +- name: De-provision GCP resources + command: /tmp/openshift_gcp_provision_remove.sh + when: + - state | default('present') == 'absent' diff --git a/roles/openshift_gcp/templates/dns.j2.sh b/roles/openshift_gcp/templates/dns.j2.sh new file mode 100644 index 000000000..eacf84b4d --- /dev/null +++ b/roles/openshift_gcp/templates/dns.j2.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -euo pipefail + +dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}" + +# Check the DNS managed zone in Google Cloud DNS, create it if it doesn't exist +if ! gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" dns managed-zones create "${dns_zone}" --dns-name "{{ public_hosted_zone }}" --description "{{ public_hosted_zone }} domain" >/dev/null +fi + +# Always output the expected nameservers as a comma delimited list +gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" --format='value(nameServers)' | tr ';' ',' diff --git a/roles/openshift_gcp/templates/provision.j2.sh b/roles/openshift_gcp/templates/provision.j2.sh new file mode 100644 index 000000000..e68e9683f --- /dev/null +++ b/roles/openshift_gcp/templates/provision.j2.sh @@ -0,0 +1,318 @@ +#!/bin/bash + +set -euo pipefail + +# Create SSH key for GCE +if [ ! -f "{{ gce_ssh_private_key }}" ]; then + ssh-keygen -t rsa -f "{{ gce_ssh_private_key }}" -C gce-provision-cloud-user -N '' + ssh-add "{{ gce_ssh_private_key }}" || true +fi + +# Check if the ~/.ssh/google_compute_engine.pub key is in the project metadata, and if not, add it there +pub_key=$(cut -d ' ' -f 2 < "{{ gce_ssh_private_key }}.pub") +key_tmp_file='/tmp/ocp-gce-keys' +if ! gcloud --project "{{ gce_project_id }}" compute project-info describe | grep -q "$pub_key"; then + if gcloud --project "{{ gce_project_id }}" compute project-info describe | grep -q ssh-rsa; then + gcloud --project "{{ gce_project_id }}" compute project-info describe | grep ssh-rsa | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -e 's/value: //' > "$key_tmp_file" + fi + echo -n 'cloud-user:' >> "$key_tmp_file" + cat "{{ gce_ssh_private_key }}.pub" >> "$key_tmp_file" + gcloud --project "{{ gce_project_id }}" compute project-info add-metadata --metadata-from-file "sshKeys=${key_tmp_file}" + rm -f "$key_tmp_file" +fi + +metadata="" +if [[ -n "{{ provision_gce_startup_script_file }}" ]]; then + if [[ ! -f "{{ provision_gce_startup_script_file }}" ]]; then + echo "Startup script file missing at {{ provision_gce_startup_script_file }} from=$(pwd)" + exit 1 + fi + metadata+="--metadata-from-file=startup-script={{ provision_gce_startup_script_file }}" +fi +if [[ -n "{{ provision_gce_user_data_file }}" ]]; then + if [[ ! -f "{{ provision_gce_user_data_file }}" ]]; then + echo "User data file missing at {{ provision_gce_user_data_file }}" + exit 1 + fi + if [[ -n "${metadata}" ]]; then + metadata+="," + else + metadata="--metadata-from-file=" + fi + metadata+="user-data={{ provision_gce_user_data_file }}" +fi + +# Select image or image family +image="{{ provision_gce_registered_image }}" +if ! gcloud --project "{{ gce_project_id }}" compute images describe "${image}" &>/dev/null; then + if ! gcloud --project "{{ gce_project_id }}" compute images describe-from-family "${image}" &>/dev/null; then + echo "No compute image or image-family found, create an image named '{{ provision_gce_registered_image }}' to continue'" + exit 1 + fi + image="family/${image}" +fi + +### PROVISION THE INFRASTRUCTURE ### + +dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}" + +# Check the DNS managed zone in Google Cloud DNS, create it if it doesn't exist and exit after printing NS servers +if ! gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then + echo "DNS zone '${dns_zone}' doesn't exist. Must be configured prior to running this script" + exit 1 +fi + +# Create network +if ! gcloud --project "{{ gce_project_id }}" compute networks describe "{{ gce_network_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute networks create "{{ gce_network_name }}" --mode "auto" +else + echo "Network '{{ gce_network_name }}' already exists" +fi + +# Firewall rules in a form: +# ['name']='parameters for "gcloud compute firewall-rules create"' +# For all possible parameters see: gcloud compute firewall-rules create --help +range="" +if [[ -n "{{ openshift_node_port_range }}" ]]; then + range=",tcp:{{ openshift_node_port_range }},udp:{{ openshift_node_port_range }}" +fi +declare -A FW_RULES=( + ['icmp']='--allow icmp' + ['ssh-external']='--allow tcp:22' + ['ssh-internal']='--allow tcp:22 --source-tags bastion' + ['master-internal']="--allow tcp:2224,tcp:2379,tcp:2380,tcp:4001,udp:4789,udp:5404,udp:5405,tcp:8053,udp:8053,tcp:8444,tcp:10250,tcp:10255,udp:10255,tcp:24224,udp:24224 --source-tags ocp --target-tags ocp-master" + ['master-external']="--allow tcp:80,tcp:443,tcp:1936,tcp:8080,tcp:8443${range} --target-tags ocp-master" + ['node-internal']="--allow udp:4789,tcp:10250,tcp:10255,udp:10255 --source-tags ocp --target-tags ocp-node,ocp-infra-node" + ['infra-node-internal']="--allow tcp:5000 --source-tags ocp --target-tags ocp-infra-node" + ['infra-node-external']="--allow tcp:80,tcp:443,tcp:1936${range} --target-tags ocp-infra-node" +) +for rule in "${!FW_RULES[@]}"; do + ( if ! gcloud --project "{{ gce_project_id }}" compute firewall-rules describe "{{ provision_prefix }}$rule" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute firewall-rules create "{{ provision_prefix }}$rule" --network "{{ gce_network_name }}" ${FW_RULES[$rule]} + else + echo "Firewall rule '{{ provision_prefix }}${rule}' already exists" + fi ) & +done + + +# Master IP +( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}master-ssl-lb-ip" --global +else + echo "IP '{{ provision_prefix }}master-ssl-lb-ip' already exists" +fi ) & + +# Internal master IP +( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" +else + echo "IP '{{ provision_prefix }}master-network-lb-ip' already exists" +fi ) & + +# Router IP +( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" +else + echo "IP '{{ provision_prefix }}router-network-lb-ip' already exists" +fi ) & + + +{% for node_group in provision_gce_node_groups %} +# configure {{ node_group.name }} +( + if ! gcloud --project "{{ gce_project_id }}" compute instance-templates describe "{{ provision_prefix }}instance-template-{{ node_group.name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute instance-templates create "{{ provision_prefix }}instance-template-{{ node_group.name }}" \ + --machine-type "{{ node_group.machine_type }}" --network "{{ gce_network_name }}" \ + --tags "{{ provision_prefix }}ocp,ocp,{{ node_group.tags }}" \ + --boot-disk-size "{{ node_group.boot_disk_size }}" --boot-disk-type "pd-ssd" \ + --scopes "logging-write,monitoring-write,useraccounts-ro,service-control,service-management,storage-ro,compute-rw" \ + --image "${image}" ${metadata} + else + echo "Instance template '{{ provision_prefix }}instance-template-{{ node_group.name }}' already exists" + fi + + # Create instance group + if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed describe "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute instance-groups managed create "{{ provision_prefix }}ig-{{ node_group.suffix }}" \ + --zone "{{ gce_zone_name }}" --template "{{ provision_prefix }}instance-template-{{ node_group.name }}" --size "{{ node_group.scale }}" + else + echo "Instance group '{{ provision_prefix }}ig-{{ node_group.suffix }}' already exists" + fi +) & +{% endfor %} + +for i in `jobs -p`; do wait $i; done + + +# Configure the master external LB rules +( +# Master health check +if ! gcloud --project "{{ gce_project_id }}" compute health-checks describe "{{ provision_prefix }}master-ssl-lb-health-check" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute health-checks create https "{{ provision_prefix }}master-ssl-lb-health-check" --port "{{ internal_console_port }}" --request-path "/healthz" +else + echo "Health check '{{ provision_prefix }}master-ssl-lb-health-check' already exists" +fi + +gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-named-ports "{{ provision_prefix }}ig-m" \ + --zone "{{ gce_zone_name }}" --named-ports "{{ provision_prefix }}port-name-master:{{ internal_console_port }}" + +# Master backend service +if ! gcloud --project "{{ gce_project_id }}" compute backend-services describe "{{ provision_prefix }}master-ssl-lb-backend" --global &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute backend-services create "{{ provision_prefix }}master-ssl-lb-backend" --health-checks "{{ provision_prefix }}master-ssl-lb-health-check" --port-name "{{ provision_prefix }}port-name-master" --protocol "TCP" --global --timeout="{{ provision_gce_master_https_timeout | default('2m') }}" + gcloud --project "{{ gce_project_id }}" compute backend-services add-backend "{{ provision_prefix }}master-ssl-lb-backend" --instance-group "{{ provision_prefix }}ig-m" --global --instance-group-zone "{{ gce_zone_name }}" +else + echo "Backend service '{{ provision_prefix }}master-ssl-lb-backend' already exists" +fi + +# Master tcp proxy target +if ! gcloud --project "{{ gce_project_id }}" compute target-tcp-proxies describe "{{ provision_prefix }}master-ssl-lb-target" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute target-tcp-proxies create "{{ provision_prefix }}master-ssl-lb-target" --backend-service "{{ provision_prefix }}master-ssl-lb-backend" +else + echo "Proxy target '{{ provision_prefix }}master-ssl-lb-target' already exists" +fi + +# Master forwarding rule +if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}master-ssl-lb-rule" --global &>/dev/null; then + IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global --format='value(address)') + gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}master-ssl-lb-rule" --address "$IP" --global --ports "{{ console_port }}" --target-tcp-proxy "{{ provision_prefix }}master-ssl-lb-target" +else + echo "Forwarding rule '{{ provision_prefix }}master-ssl-lb-rule' already exists" +fi +) & + + +# Configure the master internal LB rules +( +# Internal master health check +if ! gcloud --project "{{ gce_project_id }}" compute http-health-checks describe "{{ provision_prefix }}master-network-lb-health-check" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute http-health-checks create "{{ provision_prefix }}master-network-lb-health-check" --port "8080" --request-path "/healthz" +else + echo "Health check '{{ provision_prefix }}master-network-lb-health-check' already exists" +fi + +# Internal master target pool +if ! gcloud --project "{{ gce_project_id }}" compute target-pools describe "{{ provision_prefix }}master-network-lb-pool" --region "{{ gce_region_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute target-pools create "{{ provision_prefix }}master-network-lb-pool" --http-health-check "{{ provision_prefix }}master-network-lb-health-check" --region "{{ gce_region_name }}" +else + echo "Target pool '{{ provision_prefix }}master-network-lb-pool' already exists" +fi + +# Internal master forwarding rule +if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}master-network-lb-rule" --region "{{ gce_region_name }}" &>/dev/null; then + IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)') + gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}master-network-lb-rule" --address "$IP" --region "{{ gce_region_name }}" --target-pool "{{ provision_prefix }}master-network-lb-pool" +else + echo "Forwarding rule '{{ provision_prefix }}master-network-lb-rule' already exists" +fi +) & + + +# Configure the infra node rules +( +# Router health check +if ! gcloud --project "{{ gce_project_id }}" compute http-health-checks describe "{{ provision_prefix }}router-network-lb-health-check" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute http-health-checks create "{{ provision_prefix }}router-network-lb-health-check" --port "1936" --request-path "/healthz" +else + echo "Health check '{{ provision_prefix }}router-network-lb-health-check' already exists" +fi + +# Router target pool +if ! gcloud --project "{{ gce_project_id }}" compute target-pools describe "{{ provision_prefix }}router-network-lb-pool" --region "{{ gce_region_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute target-pools create "{{ provision_prefix }}router-network-lb-pool" --http-health-check "{{ provision_prefix }}router-network-lb-health-check" --region "{{ gce_region_name }}" +else + echo "Target pool '{{ provision_prefix }}router-network-lb-pool' already exists" +fi + +# Router forwarding rule +if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}router-network-lb-rule" --region "{{ gce_region_name }}" &>/dev/null; then + IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)') + gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}router-network-lb-rule" --address "$IP" --region "{{ gce_region_name }}" --target-pool "{{ provision_prefix }}router-network-lb-pool" +else + echo "Forwarding rule '{{ provision_prefix }}router-network-lb-rule' already exists" +fi +) & + +for i in `jobs -p`; do wait $i; done + +# set the target pools +( +if [[ "ig-m" == "{{ provision_gce_router_network_instance_group }}" ]]; then + gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}ig-m" --target-pools "{{ provision_prefix }}master-network-lb-pool,{{ provision_prefix }}router-network-lb-pool" --zone "{{ gce_zone_name }}" +else + gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}ig-m" --target-pools "{{ provision_prefix }}master-network-lb-pool" --zone "{{ gce_zone_name }}" + gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}{{ provision_gce_router_network_instance_group }}" --target-pools "{{ provision_prefix }}router-network-lb-pool" --zone "{{ gce_zone_name }}" +fi +) & + +# configure DNS +( +# Retry DNS changes until they succeed since this may be a shared resource +while true; do + dns="${TMPDIR:-/tmp}/dns.yaml" + rm -f $dns + + # DNS record for master lb + if ! gcloud --project "{{ gce_project_id }}" dns record-sets list -z "${dns_zone}" --name "{{ openshift_master_cluster_public_hostname }}" 2>/dev/null | grep -q "{{ openshift_master_cluster_public_hostname }}"; then + IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global --format='value(address)') + if [[ ! -f $dns ]]; then + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}" + fi + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "{{ openshift_master_cluster_public_hostname }}." --type A "$IP" + else + echo "DNS record for '{{ openshift_master_cluster_public_hostname }}' already exists" + fi + + # DNS record for internal master lb + if ! gcloud --project "{{ gce_project_id }}" dns record-sets list -z "${dns_zone}" --name "{{ openshift_master_cluster_hostname }}" 2>/dev/null | grep -q "{{ openshift_master_cluster_hostname }}"; then + IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)') + if [[ ! -f $dns ]]; then + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}" + fi + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "{{ openshift_master_cluster_hostname }}." --type A "$IP" + else + echo "DNS record for '{{ openshift_master_cluster_hostname }}' already exists" + fi + + # DNS record for router lb + if ! gcloud --project "{{ gce_project_id }}" dns record-sets list -z "${dns_zone}" --name "{{ wildcard_zone }}" 2>/dev/null | grep -q "{{ wildcard_zone }}"; then + IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}router-network-lb-ip" --region "{{ gce_region_name }}" --format='value(address)') + if [[ ! -f $dns ]]; then + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}" + fi + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "{{ wildcard_zone }}." --type A "$IP" + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns add -z "${dns_zone}" --ttl 3600 --name "*.{{ wildcard_zone }}." --type CNAME "{{ wildcard_zone }}." + else + echo "DNS record for '{{ wildcard_zone }}' already exists" + fi + + # Commit all DNS changes, retrying if preconditions are not met + if [[ -f $dns ]]; then + if ! out="$( gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns execute -z "${dns_zone}" 2>&1 )"; then + rc=$? + if [[ "${out}" == *"HTTPError 412: Precondition not met"* ]]; then + continue + fi + exit $rc + fi + fi + break +done +) & + +# Create bucket for registry +( +if ! gsutil ls -p "{{ gce_project_id }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" &>/dev/null; then + gsutil mb -p "{{ gce_project_id }}" -l "{{ gce_region_name }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" +else + echo "Bucket '{{ openshift_hosted_registry_storage_gcs_bucket }}' already exists" +fi +) & + +# wait until all node groups are stable +{% for node_group in provision_gce_node_groups %} +# wait for stable {{ node_group.name }} +( gcloud --project "{{ gce_project_id }}" compute instance-groups managed wait-until-stable "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --timeout=300) & +{% endfor %} + + +for i in `jobs -p`; do wait $i; done diff --git a/roles/openshift_gcp/templates/remove.j2.sh b/roles/openshift_gcp/templates/remove.j2.sh new file mode 100644 index 000000000..41ceab2b5 --- /dev/null +++ b/roles/openshift_gcp/templates/remove.j2.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +set -euo pipefail + +function teardown_cmd() { + a=( $@ ) + local name=$1 + a=( "${a[@]:1}" ) + local flag=0 + local found= + for i in ${a[@]}; do + if [[ "$i" == "--"* ]]; then + found=true + break + fi + flag=$((flag+1)) + done + if [[ -z "${found}" ]]; then + flag=$((flag+1)) + fi + if gcloud --project "{{ gce_project_id }}" ${a[@]::$flag} describe "${name}" ${a[@]:$flag} &>/dev/null; then + gcloud --project "{{ gce_project_id }}" ${a[@]::$flag} delete -q "${name}" ${a[@]:$flag} + fi +} + +function teardown() { + for i in `seq 1 20`; do + if teardown_cmd $@; then + break + fi + sleep 0.5 + done +} + +# Preemptively spin down the instances +{% for node_group in provision_gce_node_groups %} +# scale down {{ node_group.name }} +( + # performs a delete and scale down as one operation to ensure maximum parallelism + if ! instances=$( gcloud --project "{{ gce_project_id }}" compute instance-groups managed list-instances "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --format='value[terminator=","](instance)' ); then + exit 0 + fi + instances="${instances%?}" + if [[ -z "${instances}" ]]; then + echo "warning: No instances in {{ node_group.name }}" 1>&2 + exit 0 + fi + if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed delete-instances "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --instances "${instances}"; then + echo "warning: Unable to scale down the node group {{ node_group.name }}" 1>&2 + exit 0 + fi +) & +{% endfor %} + +# Bucket for registry +( +if gsutil ls -p "{{ gce_project_id }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" &>/dev/null; then + gsutil -m rm -r "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" +fi +) & + +# DNS +( +dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}" +if gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then + # Retry DNS changes until they succeed since this may be a shared resource + while true; do + dns="${TMPDIR:-/tmp}/dns.yaml" + rm -f "${dns}" + + # export all dns records that match into a zone format, and turn each line into a set of args for + # record-sets transaction. + gcloud dns record-sets export --project "{{ gce_project_id }}" -z "${dns_zone}" --zone-file-format "${dns}" + if grep -F -e '{{ openshift_master_cluster_hostname }}' -e '{{ openshift_master_cluster_public_hostname }}' -e '{{ wildcard_zone }}' "${dns}" | \ + awk '{ print "--name", $1, "--ttl", $2, "--type", $4, $5; }' > "${dns}.input" + then + rm -f "${dns}" + gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns start -z "${dns_zone}" + cat "${dns}.input" | xargs -L1 gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file="${dns}" remove -z "${dns_zone}" + + # Commit all DNS changes, retrying if preconditions are not met + if ! out="$( gcloud --project "{{ gce_project_id }}" dns record-sets transaction --transaction-file=$dns execute -z "${dns_zone}" 2>&1 )"; then + rc=$? + if [[ "${out}" == *"HTTPError 412: Precondition not met"* ]]; then + continue + fi + exit $rc + fi + fi + rm "${dns}.input" + break + done +fi +) & + +( +# Router network rules +teardown "{{ provision_prefix }}router-network-lb-rule" compute forwarding-rules --region "{{ gce_region_name }}" +teardown "{{ provision_prefix }}router-network-lb-pool" compute target-pools --region "{{ gce_region_name }}" +teardown "{{ provision_prefix }}router-network-lb-health-check" compute http-health-checks +teardown "{{ provision_prefix }}router-network-lb-ip" compute addresses --region "{{ gce_region_name }}" + +# Internal master network rules +teardown "{{ provision_prefix }}master-network-lb-rule" compute forwarding-rules --region "{{ gce_region_name }}" +teardown "{{ provision_prefix }}master-network-lb-pool" compute target-pools --region "{{ gce_region_name }}" +teardown "{{ provision_prefix }}master-network-lb-health-check" compute http-health-checks +teardown "{{ provision_prefix }}master-network-lb-ip" compute addresses --region "{{ gce_region_name }}" +) & + +( +# Master SSL network rules +teardown "{{ provision_prefix }}master-ssl-lb-rule" compute forwarding-rules --global +teardown "{{ provision_prefix }}master-ssl-lb-target" compute target-tcp-proxies +teardown "{{ provision_prefix }}master-ssl-lb-ip" compute addresses --global +teardown "{{ provision_prefix }}master-ssl-lb-backend" compute backend-services --global +teardown "{{ provision_prefix }}master-ssl-lb-health-check" compute health-checks +) & + +#Firewall rules +#['name']='parameters for "gcloud compute firewall-rules create"' +#For all possible parameters see: gcloud compute firewall-rules create --help +declare -A FW_RULES=( + ['icmp']="" + ['ssh-external']="" + ['ssh-internal']="" + ['master-internal']="" + ['master-external']="" + ['node-internal']="" + ['infra-node-internal']="" + ['infra-node-external']="" +) +for rule in "${!FW_RULES[@]}"; do + ( if gcloud --project "{{ gce_project_id }}" compute firewall-rules describe "{{ provision_prefix }}$rule" &>/dev/null; then + # retry a few times because this call can be flaky + for i in `seq 1 3`; do + if gcloud -q --project "{{ gce_project_id }}" compute firewall-rules delete "{{ provision_prefix }}$rule"; then + break + fi + done + fi ) & +done + +for i in `jobs -p`; do wait $i; done + +{% for node_group in provision_gce_node_groups %} +# teardown {{ node_group.name }} - any load balancers referencing these groups must be removed +( + teardown "{{ provision_prefix }}ig-{{ node_group.suffix }}" compute instance-groups managed --zone "{{ gce_zone_name }}" + teardown "{{ provision_prefix }}instance-template-{{ node_group.name }}" compute instance-templates +) & +{% endfor %} + +for i in `jobs -p`; do wait $i; done + +# Network +teardown "{{ gce_network_name }}" compute networks diff --git a/roles/openshift_gcp_image_prep/files/partition.conf b/roles/openshift_gcp_image_prep/files/partition.conf new file mode 100644 index 000000000..b87e5e0b6 --- /dev/null +++ b/roles/openshift_gcp_image_prep/files/partition.conf @@ -0,0 +1,3 @@ +[Service] +ExecStartPost=-/usr/bin/growpart /dev/sda 1 +ExecStartPost=-/sbin/xfs_growfs / diff --git a/roles/openshift_gcp_image_prep/tasks/main.yaml b/roles/openshift_gcp_image_prep/tasks/main.yaml new file mode 100644 index 000000000..fee5ab618 --- /dev/null +++ b/roles/openshift_gcp_image_prep/tasks/main.yaml @@ -0,0 +1,18 @@ +--- +# GCE instances are starting with xfs AND barrier=1, which is only for extfs. +- name: Remove barrier=1 from XFS fstab entries + lineinfile: + path: /etc/fstab + regexp: '^(.+)xfs(.+?),?barrier=1,?(.*?)$' + line: '\1xfs\2 \4' + backrefs: yes + +- name: Ensure the root filesystem has XFS group quota turned on + lineinfile: + path: /boot/grub2/grub.cfg + regexp: '^(.*)linux16 (.*)$' + line: '\1linux16 \2 rootflags=gquota' + backrefs: yes + +- name: Ensure the root partition grows on startup + copy: src=partition.conf dest=/etc/systemd/system/google-instance-setup.service.d/ diff --git a/roles/openshift_master/meta/main.yml b/roles/openshift_master/meta/main.yml index b0237141b..a657668a9 100644 --- a/roles/openshift_master/meta/main.yml +++ b/roles/openshift_master/meta/main.yml @@ -14,19 +14,3 @@ galaxy_info: dependencies: - role: lib_openshift - role: lib_os_firewall -- role: openshift_master_facts -- role: openshift_hosted_facts -- role: openshift_master_certificates -- role: openshift_etcd_client_certificates - etcd_cert_subdir: "openshift-master-{{ openshift.common.hostname }}" - etcd_cert_config_dir: "{{ openshift.common.config_base }}/master" - etcd_cert_prefix: "master.etcd-" - when: groups.oo_etcd_to_config | default([]) | length != 0 -- role: openshift_clock -- role: openshift_cloud_provider -- role: openshift_builddefaults -- role: openshift_buildoverrides -- role: nickhammond.logrotate -- role: contiv - contiv_role: netmaster - when: openshift_use_contiv | default(False) | bool diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml index 18d6a1645..5aa8aecec 100644 --- a/roles/openshift_prometheus/defaults/main.yaml +++ b/roles/openshift_prometheus/defaults/main.yaml @@ -11,7 +11,7 @@ openshift_prometheus_node_selector: {"region":"infra"} openshift_prometheus_image_proxy: "openshift/oauth-proxy:v1.0.0" openshift_prometheus_image_prometheus: "openshift/prometheus:v2.0.0-dev" openshift_prometheus_image_alertmanager: "openshift/prometheus-alertmanager:dev" -openshift_prometheus_image_alertbuffer: "ilackarms/message-buffer" +openshift_prometheus_image_alertbuffer: "openshift/prometheus-alert-buffer:v0.0.1" # additional prometheus rules file openshift_prometheus_additional_rules_file: null diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml index 93bdda3e8..a9bce2fb1 100644 --- a/roles/openshift_prometheus/tasks/install_prometheus.yaml +++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml @@ -107,7 +107,10 @@ - name: annotate prometheus service command: > {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - service prometheus 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls' + service prometheus + prometheus.io/scrape='true' + prometheus.io/scheme=https + service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls - name: annotate alerts service command: > diff --git a/roles/openshift_sanitize_inventory/tasks/main.yml b/roles/openshift_sanitize_inventory/tasks/main.yml index 59ce505d3..47d7be05a 100644 --- a/roles/openshift_sanitize_inventory/tasks/main.yml +++ b/roles/openshift_sanitize_inventory/tasks/main.yml @@ -12,6 +12,27 @@ deployment_type is deprecated in favor of openshift_deployment_type. Please specify only openshift_deployment_type, or make both the same. +# osm_cluster_network_cidr, osm_host_subnet_length and openshift_portal_net are +# now required to avoid changes that may occur between releases +# +# Note: We will skip these checks when some tests run which don't +# actually do any insalling/upgrading/scaling/etc.. +# Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1451023 +- when: + - not testing_skip_some_requirements|default(False)|bool + assert: + that: + - "osm_cluster_network_cidr is defined" + - "osm_host_subnet_length is defined" + - "openshift_portal_net is defined" + msg: > + osm_cluster_network_cidr, osm_host_subnet_length, and openshift_portal_net are required inventory + variables. If you are upgrading or scaling up these variables should match what is currently used + in the cluster. If you don't remember what these values are you can find them in + /etc/origin/master/master-config.yaml on a master with the names clusterNetworkCIDR + (osm_cluster_network_cidr), hostSubnetLength (osm_host_subnet_length), + and serviceNetworkCIDR (openshift_portal_net). + - name: Standardize on latest variable names set_fact: # goal is to deprecate deployment_type in favor of openshift_deployment_type. diff --git a/test/integration/openshift_health_checker/common.go b/test/integration/openshift_health_checker/common.go index a92d6861d..8b79c48cb 100644 --- a/test/integration/openshift_health_checker/common.go +++ b/test/integration/openshift_health_checker/common.go @@ -25,7 +25,7 @@ func (p PlaybookTest) Run(t *testing.T) { // A PlaybookTest is intended to be run in parallel with other tests. t.Parallel() - cmd := exec.Command("ansible-playbook", "-i", "/dev/null", p.Path) + cmd := exec.Command("ansible-playbook", "-e", "testing_skip_some_requirements=1", "-i", "/dev/null", p.Path) cmd.Env = append(os.Environ(), "ANSIBLE_FORCE_COLOR=1") b, err := cmd.CombinedOutput() |