diff options
35 files changed, 688 insertions, 143 deletions
diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 49f88fa20..f09c3d255 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -84,6 +84,13 @@ openshift_release=v3.6 # See: https://godoc.org/github.com/openshift/origin/pkg/cmd/server/api#ImagePolicyConfig #openshift_master_image_policy_config={"maxImagesBulkImportedPerRepository": 3, "disableScheduledImport": true} +# Configure master API rate limits for external clients +#openshift_master_external_ratelimit_qps=200 +#openshift_master_external_ratelimit_burst=400 +# Configure master API rate limits for loopback clients +#openshift_master_loopback_ratelimit_qps=300 +#openshift_master_loopback_ratelimit_burst=600 + # Docker Configuration # Add additional, insecure, and blocked registries to global docker configuration # For enterprise deployment types we ensure that registry.access.redhat.com is diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index ec18cb8ff..c4b5da5b8 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -84,6 +84,13 @@ openshift_release=v3.6 # See: https://godoc.org/github.com/openshift/origin/pkg/cmd/server/api#ImagePolicyConfig #openshift_master_image_policy_config={"maxImagesBulkImportedPerRepository": 3, "disableScheduledImport": true} +# Configure master API rate limits for external clients +#openshift_master_external_ratelimit_qps=200 +#openshift_master_external_ratelimit_burst=400 +# Configure master API rate limits for loopback clients +#openshift_master_loopback_ratelimit_qps=300 +#openshift_master_loopback_ratelimit_burst=600 + # Docker Configuration # Add additional, insecure, and blocked registries to global docker configuration # For enterprise deployment types we ensure that registry.access.redhat.com is diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index acf5469bf..c0978c6f6 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -3,10 +3,6 @@ tags: - always -- include: ../../common/openshift-cluster/std_include.yml - tags: - - always - - include: ../../common/openshift-cluster/config.yml vars: openshift_cluster_id: "{{ cluster_id | default('default') }}" diff --git a/playbooks/common/openshift-cluster/config.yml b/playbooks/common/openshift-cluster/config.yml index 31c4b04af..7136f1c1f 100644 --- a/playbooks/common/openshift-cluster/config.yml +++ b/playbooks/common/openshift-cluster/config.yml @@ -1,4 +1,8 @@ --- +- include: std_include.yml + tags: + - always + # TODO: refactor this into its own include # and pass a variable for ctx - name: Verify Requirements diff --git a/playbooks/common/openshift-cluster/openshift_logging.yml b/playbooks/common/openshift-cluster/openshift_logging.yml index 57580406c..c5f0c406a 100644 --- a/playbooks/common/openshift-cluster/openshift_logging.yml +++ b/playbooks/common/openshift-cluster/openshift_logging.yml @@ -1,5 +1,5 @@ --- -- include: evaluate_groups.yml +- include: std_include.yml - name: OpenShift Aggregated Logging hosts: oo_first_master diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index 5de03951c..b30450def 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -5,6 +5,19 @@ t_oo_option_master_debug_level: "{{ lookup('oo_option', 'openshift_master_debug_level') }}" pre_tasks: + # Per https://bugzilla.redhat.com/show_bug.cgi?id=1469336 + # + # When scaling up a cluster upgraded from OCP <= 3.5, ensure that + # OPENSHIFT_DEFAULT_REGISTRY is present as defined on the existing + # masters, or absent if such is the case. + - name: Detect if this host is a new master in a scale up + set_fact: + g_openshift_master_is_scaleup: "{{ openshift.common.hostname in ( groups['new_masters'] | default([]) ) }}" + + - name: Scaleup Detection + debug: + var: g_openshift_master_is_scaleup + - name: Check for RPM generated config marker file .config_managed stat: path: /etc/origin/.config_managed @@ -69,7 +82,7 @@ ha: "{{ openshift_master_ha | default(groups.oo_masters | length > 1) }}" master_count: "{{ openshift_master_count | default(groups.oo_masters | length) }}" -- name: Inspect state of first master session secrets and config +- name: Inspect state of first master config settings hosts: oo_first_master roles: - role: openshift_facts @@ -98,6 +111,42 @@ set_fact: l_etcd3_enabled: "{{ etcd3_grep.rc == 0 | bool }}" + - name: Check if atomic-openshift-master sysconfig exists yet + stat: + path: /etc/sysconfig/atomic-openshift-master + register: l_aom_exists + + - name: Preserve OPENSHIFT_DEFAULT_REGISTRY master parameter if present + command: awk '/^OPENSHIFT_DEFAULT_REGISTRY/' /etc/sysconfig/atomic-openshift-master + register: l_default_registry_defined + when: l_aom_exists.stat.exists | bool + + - name: Check if atomic-openshift-master-api sysconfig exists yet + stat: + path: /etc/sysconfig/atomic-openshift-master-api + register: l_aom_api_exists + + - name: Preserve OPENSHIFT_DEFAULT_REGISTRY master-api parameter if present + command: awk '/^OPENSHIFT_DEFAULT_REGISTRY/' /etc/sysconfig/atomic-openshift-master-api + register: l_default_registry_defined_api + when: l_aom_api_exists.stat.exists | bool + + - name: Check if atomic-openshift-master-controllers sysconfig exists yet + stat: + path: /etc/sysconfig/atomic-openshift-master-controllers + register: l_aom_controllers_exists + + - name: Preserve OPENSHIFT_DEFAULT_REGISTRY master-controllers parameter if present + command: awk '/^OPENSHIFT_DEFAULT_REGISTRY/' /etc/sysconfig/atomic-openshift-master-controllers + register: l_default_registry_defined_controllers + when: l_aom_controllers_exists.stat.exists | bool + + - name: Update facts with OPENSHIFT_DEFAULT_REGISTRY value + set_fact: + l_default_registry_value: "{{ l_default_registry_defined.stdout | default('') }}" + l_default_registry_value_api: "{{ l_default_registry_defined_api.stdout | default('') }}" + l_default_registry_value_controllers: "{{ l_default_registry_defined_controllers.stdout | default('') }}" + - name: Generate master session secrets hosts: oo_first_master vars: @@ -145,6 +194,10 @@ etcd_cert_prefix: "master.etcd-" r_openshift_master_clean_install: "{{ hostvars[groups.oo_first_master.0].l_clean_install }}" r_openshift_master_etcd3_storage: "{{ hostvars[groups.oo_first_master.0].l_etcd3_enabled }}" + openshift_master_is_scaleup_host: "{{ g_openshift_master_is_scaleup | default(false) }}" + openshift_master_default_registry_value: "{{ hostvars[groups.oo_first_master.0].l_default_registry_value }}" + openshift_master_default_registry_value_api: "{{ hostvars[groups.oo_first_master.0].l_default_registry_value_api }}" + openshift_master_default_registry_value_controllers: "{{ hostvars[groups.oo_first_master.0].l_default_registry_value_controllers }}" - role: nuage_master when: openshift.common.use_nuage | bool - role: calico_master diff --git a/roles/calico_master/tasks/main.yml b/roles/calico_master/tasks/main.yml index 8ddca26d6..16d960d8b 100644 --- a/roles/calico_master/tasks/main.yml +++ b/roles/calico_master/tasks/main.yml @@ -4,7 +4,7 @@ path: "{{ item }}" with_items: - "{{ calico_etcd_ca_cert_file }}" - - "{{ calico_etcd_cert_file}}" + - "{{ calico_etcd_cert_file }}" - "{{ calico_etcd_key_file }}" - name: Calico Master | Create temp directory for policy controller definition diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 40a28cde5..85cbc6224 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -105,6 +105,29 @@ class OpenShiftCheck(object): raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys)))) return value + @staticmethod + def get_major_minor_version(openshift_image_tag): + """Parse and return the deployed version of OpenShift as a tuple.""" + if openshift_image_tag and openshift_image_tag[0] == 'v': + openshift_image_tag = openshift_image_tag[1:] + + # map major release versions across releases + # to a common major version + openshift_major_release_version = { + "1": "3", + } + + components = openshift_image_tag.split(".") + if not components or len(components) < 2: + msg = "An invalid version of OpenShift was found for this host: {}" + raise OpenShiftCheckException(msg.format(openshift_image_tag)) + + if components[0] in openshift_major_release_version: + components[0] = openshift_major_release_version[components[0]] + + components = tuple(int(x) for x in components[:2]) + return components + LOADER_EXCLUDES = ( "__init__.py", diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py index f82ae64d7..32d853d57 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/curator.py +++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py @@ -9,11 +9,11 @@ class Curator(LoggingCheck): name = "curator" tags = ["health", "logging"] - logging_namespace = None - def run(self): + """Check various things and gather errors. Returns: result as hash""" + self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - curator_pods, error = super(Curator, self).get_pods_for_component( + curator_pods, error = self.get_pods_for_component( self.logging_namespace, "curator", ) @@ -23,7 +23,6 @@ class Curator(LoggingCheck): if check_error: msg = ("The following Curator deployment issue was found:" - "\n-------\n" "{}".format(check_error)) return {"failed": True, "changed": False, "msg": msg} @@ -39,7 +38,7 @@ class Curator(LoggingCheck): "Is Curator correctly deployed?" ) - not_running = super(Curator, self).not_running_pods(pods) + not_running = self.not_running_pods(pods) if len(not_running) == len(pods): return ( "The Curator pod is not currently in a running state,\n" diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py index 1e478c04d..8bdda1f32 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -12,13 +12,11 @@ class Elasticsearch(LoggingCheck): name = "elasticsearch" tags = ["health", "logging"] - logging_namespace = None - def run(self): """Check various things and gather errors. Returns: result as hash""" self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - es_pods, error = super(Elasticsearch, self).get_pods_for_component( + es_pods, error = self.get_pods_for_component( self.logging_namespace, "es", ) @@ -28,7 +26,6 @@ class Elasticsearch(LoggingCheck): if check_error: msg = ("The following Elasticsearch deployment issue was found:" - "\n-------\n" "{}".format(check_error)) return {"failed": True, "changed": False, "msg": msg} @@ -37,7 +34,7 @@ class Elasticsearch(LoggingCheck): def _not_running_elasticsearch_pods(self, es_pods): """Returns: list of pods that are not running, list of errors about non-running pods""" - not_running = super(Elasticsearch, self).not_running_pods(es_pods) + not_running = self.not_running_pods(es_pods) if not_running: return not_running, [( 'The following Elasticsearch pods are not running:\n' @@ -78,7 +75,7 @@ class Elasticsearch(LoggingCheck): for pod_name in pods_by_name.keys(): # Compare what each ES node reports as master and compare for split brain get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master") - master_name_str = self._exec_oc(get_master_cmd, []) + master_name_str = self.exec_oc(self.logging_namespace, get_master_cmd, []) master_names = (master_name_str or '').split(' ') if len(master_names) > 1: es_master_names.add(master_names[1]) @@ -111,7 +108,7 @@ class Elasticsearch(LoggingCheck): # get ES cluster nodes node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes') - cluster_node_data = self._exec_oc(node_cmd, []) + cluster_node_data = self.exec_oc(self.logging_namespace, node_cmd, []) try: cluster_nodes = json.loads(cluster_node_data)['nodes'] except (ValueError, KeyError): @@ -138,7 +135,7 @@ class Elasticsearch(LoggingCheck): error_msgs = [] for pod_name in pods_by_name.keys(): cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true') - cluster_health_data = self._exec_oc(cluster_health_cmd, []) + cluster_health_data = self.exec_oc(self.logging_namespace, cluster_health_cmd, []) try: health_res = json.loads(cluster_health_data) if not health_res or not health_res.get('status'): @@ -165,7 +162,7 @@ class Elasticsearch(LoggingCheck): error_msgs = [] for pod_name in pods_by_name.keys(): df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) - disk_output = self._exec_oc(df_cmd, []) + disk_output = self.exec_oc(self.logging_namespace, df_cmd, []) lines = disk_output.splitlines() # expecting one header looking like 'IUse% Use%' and one body line body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$' @@ -201,10 +198,3 @@ class Elasticsearch(LoggingCheck): )) return error_msgs - - def _exec_oc(self, cmd_str, extra_args): - return super(Elasticsearch, self).exec_oc( - self.logging_namespace, - cmd_str, - extra_args, - ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py index 063e707a9..b3485bf44 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py @@ -11,8 +11,6 @@ class Fluentd(LoggingCheck): name = "fluentd" tags = ["health", "logging"] - logging_namespace = None - def run(self): """Check various things and gather errors. Returns: result as hash""" @@ -27,7 +25,6 @@ class Fluentd(LoggingCheck): if check_error: msg = ("The following Fluentd deployment issue was found:" - "\n-------\n" "{}".format(check_error)) return {"failed": True, "changed": False, "msg": msg} @@ -147,7 +144,11 @@ class Fluentd(LoggingCheck): def get_nodes_by_name(self): """Retrieve all the node definitions. Returns: dict(name: node), error string""" - nodes_json = self._exec_oc("get nodes -o json", []) + nodes_json = self.exec_oc( + self.logging_namespace, + "get nodes -o json", + [] + ) try: nodes = json.loads(nodes_json) except ValueError: # no valid json - should not happen @@ -158,10 +159,3 @@ class Fluentd(LoggingCheck): node['metadata']['name']: node for node in nodes['items'] }, None - - def _exec_oc(self, cmd_str, extra_args): - return super(Fluentd, self).exec_oc( - self.logging_namespace, - cmd_str, - extra_args, - ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py new file mode 100644 index 000000000..0970f0a63 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py @@ -0,0 +1,138 @@ +""" +Module for performing checks on a Fluentd logging deployment configuration +""" + +from openshift_checks import OpenShiftCheckException +from openshift_checks.logging.logging import LoggingCheck + + +class FluentdConfig(LoggingCheck): + """Module that checks logging configuration of an integrated logging Fluentd deployment""" + name = "fluentd_config" + tags = ["health"] + + def is_active(self): + logging_deployed = self.get_var("openshift_hosted_logging_deploy", default=False) + + try: + version = self.get_major_minor_version(self.get_var("openshift_image_tag")) + except ValueError: + # if failed to parse OpenShift version, perform check anyway (if logging enabled) + return logging_deployed + + return logging_deployed and version < (3, 6) + + def run(self): + """Check that Fluentd has running pods, and that its logging config matches Docker's logging config.""" + self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace) + config_error = self.check_logging_config() + if config_error: + msg = ("The following Fluentd logging configuration problem was found:" + "\n{}".format(config_error)) + return {"failed": True, "msg": msg} + + return {} + + def check_logging_config(self): + """Ensure that the configured Docker logging driver matches fluentd settings. + This means that, at least for now, if the following condition is met: + + openshift_logging_fluentd_use_journal == True + + then the value of the configured Docker logging driver should be "journald". + Otherwise, the value of the Docker logging driver should be "json-file". + Returns an error string if the above condition is not met, or None otherwise.""" + use_journald = self.get_var("openshift_logging_fluentd_use_journal", default=True) + + # if check is running on a master, retrieve all running pods + # and check any pod's container for the env var "USE_JOURNAL" + group_names = self.get_var("group_names") + if "masters" in group_names: + use_journald = self.check_fluentd_env_var() + + docker_info = self.execute_module("docker_info", {}) + try: + logging_driver = docker_info["info"]["LoggingDriver"] + except KeyError: + return "Unable to determine Docker logging driver." + + logging_driver = docker_info["info"]["LoggingDriver"] + recommended_logging_driver = "journald" + error = None + + # If fluentd is set to use journald but Docker is not, recommend setting the `--log-driver` + # option as an inventory file variable, or adding the log driver value as part of the + # Docker configuration in /etc/docker/daemon.json. There is no global --log-driver flag that + # can be passed to the Docker binary; the only other recommendation that can be made, would be + # to pass the `--log-driver` flag to the "run" sub-command of the `docker` binary when running + # individual containers. + if use_journald and logging_driver != "journald": + error = ('Your Fluentd configuration is set to aggregate Docker container logs from "journald".\n' + 'This differs from your Docker configuration, which has been set to use "{driver}" ' + 'as the default method of storing logs.\n' + 'This discrepancy in configuration will prevent Fluentd from receiving any logs' + 'from your Docker containers.').format(driver=logging_driver) + elif not use_journald and logging_driver != "json-file": + recommended_logging_driver = "json-file" + error = ('Your Fluentd configuration is set to aggregate Docker container logs from ' + 'individual json log files per container.\n ' + 'This differs from your Docker configuration, which has been set to use ' + '"{driver}" as the default method of storing logs.\n' + 'This discrepancy in configuration will prevent Fluentd from receiving any logs' + 'from your Docker containers.').format(driver=logging_driver) + + if error: + error += ('\nTo resolve this issue, add the following variable to your Ansible inventory file:\n\n' + ' openshift_docker_options="--log-driver={driver}"\n\n' + 'Alternatively, you can add the following option to your Docker configuration, located in' + '"/etc/docker/daemon.json":\n\n' + '{{ "log-driver": "{driver}" }}\n\n' + 'See https://docs.docker.com/engine/admin/logging/json-file ' + 'for more information.').format(driver=recommended_logging_driver) + + return error + + def check_fluentd_env_var(self): + """Read and return the value of the 'USE_JOURNAL' environment variable on a fluentd pod.""" + running_pods = self.running_fluentd_pods() + + try: + pod_containers = running_pods[0]["spec"]["containers"] + except KeyError: + return "Unable to detect running containers on selected Fluentd pod." + + if not pod_containers: + msg = ('There are no running containers on selected Fluentd pod "{}".\n' + 'Unable to calculate expected logging driver.').format(running_pods[0]["metadata"].get("name", "")) + raise OpenShiftCheckException(msg) + + pod_env = pod_containers[0].get("env") + if not pod_env: + msg = ('There are no environment variables set on the Fluentd container "{}".\n' + 'Unable to calculate expected logging driver.').format(pod_containers[0].get("name")) + raise OpenShiftCheckException(msg) + + for env in pod_env: + if env["name"] == "USE_JOURNAL": + return env.get("value", "false") != "false" + + return False + + def running_fluentd_pods(self): + """Return a list of running fluentd pods.""" + fluentd_pods, error = self.get_pods_for_component( + self.logging_namespace, + "fluentd", + ) + if error: + msg = 'Unable to retrieve any pods for the "fluentd" logging component: {}'.format(error) + raise OpenShiftCheckException(msg) + + running_fluentd_pods = [pod for pod in fluentd_pods if pod['status']['phase'] == 'Running'] + if not running_fluentd_pods: + msg = ('No Fluentd pods were found to be in the "Running" state. ' + 'At least one Fluentd pod is required in order to perform this check.') + + raise OpenShiftCheckException(msg) + + return running_fluentd_pods diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py index 60f94e106..efb14ab42 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -21,13 +21,11 @@ class Kibana(LoggingCheck): name = "kibana" tags = ["health", "logging"] - logging_namespace = None - def run(self): """Check various things and gather errors. Returns: result as hash""" self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging") - kibana_pods, error = super(Kibana, self).get_pods_for_component( + kibana_pods, error = self.get_pods_for_component( self.logging_namespace, "kibana", ) @@ -40,7 +38,6 @@ class Kibana(LoggingCheck): if check_error: msg = ("The following Kibana deployment issue was found:" - "\n-------\n" "{}".format(check_error)) return {"failed": True, "changed": False, "msg": msg} @@ -118,7 +115,11 @@ class Kibana(LoggingCheck): """ # Get logging url - get_route = self._exec_oc("get route logging-kibana -o json", []) + get_route = self.exec_oc( + self.logging_namespace, + "get route logging-kibana -o json", + [], + ) if not get_route: return None, 'no_route_exists' @@ -217,10 +218,3 @@ class Kibana(LoggingCheck): ).format(error=error) return error return None - - def _exec_oc(self, cmd_str, extra_args): - return super(Kibana, self).exec_oc( - self.logging_namespace, - cmd_str, - extra_args, - ) diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index cd6ebd493..d5e55bc25 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -21,12 +21,6 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): "3.4": "2.4", } - # map major release versions across releases - # to a common major version - openshift_major_release_version = { - "1": "3", - } - def is_active(self): """Skip hosts that do not have package requirements.""" group_names = self.get_var("group_names", default=[]) @@ -46,32 +40,15 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): def get_required_ovs_version(self): """Return the correct Open vSwitch version for the current OpenShift version""" - openshift_version = self._get_openshift_version() + openshift_version_tuple = self.get_major_minor_version(self.get_var("openshift_image_tag")) - if float(openshift_version) < 3.5: + if openshift_version_tuple < (3, 5): return self.openshift_to_ovs_version["3.4"] - ovs_version = self.openshift_to_ovs_version.get(str(openshift_version)) + openshift_version = ".".join(str(x) for x in openshift_version_tuple) + ovs_version = self.openshift_to_ovs_version.get(openshift_version) if ovs_version: - return self.openshift_to_ovs_version[str(openshift_version)] + return self.openshift_to_ovs_version[openshift_version] msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" raise OpenShiftCheckException(msg.format(openshift_version)) - - def _get_openshift_version(self): - openshift_version = self.get_var("openshift_image_tag") - if openshift_version and openshift_version[0] == 'v': - openshift_version = openshift_version[1:] - - return self._parse_version(openshift_version) - - def _parse_version(self, version): - components = version.split(".") - if not components or len(components) < 2: - msg = "An invalid version of OpenShift was found for this host: {}" - raise OpenShiftCheckException(msg.format(version)) - - if components[0] in self.openshift_major_release_version: - components[0] = self.openshift_major_release_version[components[0]] - - return '.'.join(components[:2]) diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py index 9edfc17c7..67408609a 100644 --- a/roles/openshift_health_checker/test/elasticsearch_test.py +++ b/roles/openshift_health_checker/test/elasticsearch_test.py @@ -6,14 +6,6 @@ from openshift_checks.logging.elasticsearch import Elasticsearch task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin'))) -def canned_elasticsearch(task_vars=None, exec_oc=None): - """Create an Elasticsearch check object with canned exec_oc method""" - check = Elasticsearch("dummy", task_vars or {}) # fails if a module is actually invoked - if exec_oc: - check._exec_oc = exec_oc - return check - - def assert_error(error, expect_error): if expect_error: assert error @@ -50,10 +42,10 @@ split_es_pod = { def test_check_elasticsearch(): - assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([]) + assert 'No logging Elasticsearch pods' in Elasticsearch().check_elasticsearch([]) # canned oc responses to match so all the checks pass - def _exec_oc(cmd, args): + def _exec_oc(ns, cmd, args): if '_cat/master' in cmd: return 'name logging-es' elif '/_nodes' in cmd: @@ -65,7 +57,9 @@ def test_check_elasticsearch(): else: raise Exception(cmd) - assert not canned_elasticsearch({}, _exec_oc).check_elasticsearch([plain_es_pod]) + check = Elasticsearch(None, {}) + check.exec_oc = _exec_oc + assert not check.check_elasticsearch([plain_es_pod]) def pods_by_name(pods): @@ -88,8 +82,8 @@ def pods_by_name(pods): ]) def test_check_elasticsearch_masters(pods, expect_error): test_pods = list(pods) - check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: test_pods.pop(0)['_test_master_name_str']) - + check = Elasticsearch(None, task_vars_config_base) + check.execute_module = lambda cmd, args: {'result': test_pods.pop(0)['_test_master_name_str']} errors = check._check_elasticsearch_masters(pods_by_name(pods)) assert_error(''.join(errors), expect_error) @@ -124,7 +118,8 @@ es_node_list = { ), ]) def test_check_elasticsearch_node_list(pods, node_list, expect_error): - check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list)) + check = Elasticsearch(None, task_vars_config_base) + check.execute_module = lambda cmd, args: {'result': json.dumps(node_list)} errors = check._check_elasticsearch_node_list(pods_by_name(pods)) assert_error(''.join(errors), expect_error) @@ -149,7 +144,8 @@ def test_check_elasticsearch_node_list(pods, node_list, expect_error): ]) def test_check_elasticsearch_cluster_health(pods, health_data, expect_error): test_health_data = list(health_data) - check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(test_health_data.pop(0))) + check = Elasticsearch(None, task_vars_config_base) + check.execute_module = lambda cmd, args: {'result': json.dumps(test_health_data.pop(0))} errors = check._check_es_cluster_health(pods_by_name(pods)) assert_error(''.join(errors), expect_error) @@ -174,7 +170,8 @@ def test_check_elasticsearch_cluster_health(pods, health_data, expect_error): ), ]) def test_check_elasticsearch_diskspace(disk_data, expect_error): - check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: disk_data) + check = Elasticsearch(None, task_vars_config_base) + check.execute_module = lambda cmd, args: {'result': disk_data} errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod])) assert_error(''.join(errors), expect_error) diff --git a/roles/openshift_health_checker/test/fluentd_config_test.py b/roles/openshift_health_checker/test/fluentd_config_test.py new file mode 100644 index 000000000..8a2d8b72b --- /dev/null +++ b/roles/openshift_health_checker/test/fluentd_config_test.py @@ -0,0 +1,357 @@ +import pytest + +from openshift_checks.logging.fluentd_config import FluentdConfig, OpenShiftCheckException + + +def canned_fluentd_pod(containers): + return { + "metadata": { + "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, + "name": "logging-fluentd-1", + }, + "spec": { + "host": "node1", + "nodeName": "node1", + "containers": containers, + }, + "status": { + "phase": "Running", + "containerStatuses": [{"ready": True}], + "conditions": [{"status": "True", "type": "Ready"}], + } + } + + +fluentd_pod = { + "metadata": { + "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, + "name": "logging-fluentd-1", + }, + "spec": { + "host": "node1", + "nodeName": "node1", + "containers": [ + { + "name": "container1", + "env": [ + { + "name": "USE_JOURNAL", + "value": "true", + } + ], + } + ], + }, + "status": { + "phase": "Running", + "containerStatuses": [{"ready": True}], + "conditions": [{"status": "True", "type": "Ready"}], + } +} + +not_running_fluentd_pod = { + "metadata": { + "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, + "name": "logging-fluentd-2", + }, + "status": { + "phase": "Unknown", + "containerStatuses": [{"ready": True}, {"ready": False}], + "conditions": [{"status": "True", "type": "Ready"}], + } +} + + +@pytest.mark.parametrize('name, use_journald, logging_driver, extra_words', [ + ( + 'test success with use_journald=false, and docker config set to use "json-file"', + False, + "json-file", + [], + ), +], ids=lambda argvals: argvals[0]) +def test_check_logging_config_non_master(name, use_journald, logging_driver, extra_words): + def execute_module(module_name, args): + if module_name == "docker_info": + return { + "info": { + "LoggingDriver": logging_driver, + } + } + + return {} + + task_vars = dict( + group_names=["nodes", "etcd"], + openshift_logging_fluentd_use_journal=use_journald, + openshift=dict( + common=dict(config_base=""), + ), + ) + + check = FluentdConfig(execute_module, task_vars) + check.execute_module = execute_module + error = check.check_logging_config() + + assert error is None + + +@pytest.mark.parametrize('name, use_journald, logging_driver, words', [ + ( + 'test failure with use_journald=false, but docker config set to use "journald"', + False, + "journald", + ['json log files', 'has been set to use "journald"'], + ), + ( + 'test failure with use_journald=false, but docker config set to use an "unsupported" driver', + False, + "unsupported", + ["json log files", 'has been set to use "unsupported"'], + ), + ( + 'test failure with use_journald=true, but docker config set to use "json-file"', + True, + "json-file", + ['logs from "journald"', 'has been set to use "json-file"'], + ), +], ids=lambda argvals: argvals[0]) +def test_check_logging_config_non_master_failed(name, use_journald, logging_driver, words): + def execute_module(module_name, args): + if module_name == "docker_info": + return { + "info": { + "LoggingDriver": logging_driver, + } + } + + return {} + + task_vars = dict( + group_names=["nodes", "etcd"], + openshift_logging_fluentd_use_journal=use_journald, + openshift=dict( + common=dict(config_base=""), + ), + ) + + check = FluentdConfig(execute_module, task_vars) + check.execute_module = execute_module + error = check.check_logging_config() + + assert error is not None + for word in words: + assert word in error + + +@pytest.mark.parametrize('name, pods, logging_driver, extra_words', [ + # use_journald returns false (not using journald), but check succeeds + # since docker is set to use json-file + ( + 'test success with use_journald=false, and docker config set to use default driver "json-file"', + [canned_fluentd_pod( + [ + { + "name": "container1", + "env": [{ + "name": "USE_JOURNAL", + "value": "false", + }], + }, + ] + )], + "json-file", + [], + ), + ( + 'test success with USE_JOURNAL env var missing and docker config set to use default driver "json-file"', + [canned_fluentd_pod( + [ + { + "name": "container1", + "env": [{ + "name": "RANDOM", + "value": "value", + }], + }, + ] + )], + "json-file", + [], + ), +], ids=lambda argvals: argvals[0]) +def test_check_logging_config_master(name, pods, logging_driver, extra_words): + def execute_module(module_name, args): + if module_name == "docker_info": + return { + "info": { + "LoggingDriver": logging_driver, + } + } + + return {} + + task_vars = dict( + group_names=["masters"], + openshift=dict( + common=dict(config_base=""), + ), + ) + + def get_pods(namespace, logging_component): + return pods, None + + check = FluentdConfig(execute_module, task_vars) + check.execute_module = execute_module + check.get_pods_for_component = get_pods + error = check.check_logging_config() + + assert error is None + + +@pytest.mark.parametrize('name, pods, logging_driver, words', [ + ( + 'test failure with use_journald=false, but docker config set to use "journald"', + [canned_fluentd_pod( + [ + { + "name": "container1", + "env": [{ + "name": "USE_JOURNAL", + "value": "false", + }], + }, + ] + )], + "journald", + ['json log files', 'has been set to use "journald"'], + ), + ( + 'test failure with use_journald=true, but docker config set to use "json-file"', + [fluentd_pod], + "json-file", + ['logs from "journald"', 'has been set to use "json-file"'], + ), + ( + 'test failure with use_journald=false, but docker set to use an "unsupported" driver', + [canned_fluentd_pod( + [ + { + "name": "container1", + "env": [{ + "name": "USE_JOURNAL", + "value": "false", + }], + }, + ] + )], + "unsupported", + ["json log files", 'has been set to use "unsupported"'], + ), + ( + 'test failure with USE_JOURNAL env var missing and docker config set to use "journald"', + [canned_fluentd_pod( + [ + { + "name": "container1", + "env": [{ + "name": "RANDOM", + "value": "value", + }], + }, + ] + )], + "journald", + ["configuration is set to", "json log files"], + ), +], ids=lambda argvals: argvals[0]) +def test_check_logging_config_master_failed(name, pods, logging_driver, words): + def execute_module(module_name, args): + if module_name == "docker_info": + return { + "info": { + "LoggingDriver": logging_driver, + } + } + + return {} + + task_vars = dict( + group_names=["masters"], + openshift=dict( + common=dict(config_base=""), + ), + ) + + def get_pods(namespace, logging_component): + return pods, None + + check = FluentdConfig(execute_module, task_vars) + check.execute_module = execute_module + check.get_pods_for_component = get_pods + error = check.check_logging_config() + + assert error is not None + for word in words: + assert word in error + + +@pytest.mark.parametrize('name, pods, response, logging_driver, extra_words', [ + ( + 'test OpenShiftCheckException with no running containers', + [canned_fluentd_pod([])], + { + "failed": True, + "result": "unexpected", + }, + "json-file", + ['no running containers'], + ), + ( + 'test OpenShiftCheckException one container and no env vars set', + [canned_fluentd_pod( + [ + { + "name": "container1", + "env": [], + }, + ] + )], + { + "failed": True, + "result": "unexpected", + }, + "json-file", + ['no environment variables'], + ), +], ids=lambda argvals: argvals[0]) +def test_check_logging_config_master_fails_on_unscheduled_deployment(name, pods, response, logging_driver, extra_words): + def execute_module(module_name, args): + if module_name == "docker_info": + return { + "info": { + "LoggingDriver": logging_driver, + } + } + + return {} + + task_vars = dict( + group_names=["masters"], + openshift=dict( + common=dict(config_base=""), + ), + ) + + def get_pods(namespace, logging_component): + return pods, None + + check = FluentdConfig(execute_module, task_vars) + check.get_pods_for_component = get_pods + + with pytest.raises(OpenShiftCheckException) as error: + check.check_logging_config() + + assert error is not None + for word in extra_words: + assert word in str(error) diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py index 9cee57868..a84d89cef 100644 --- a/roles/openshift_health_checker/test/fluentd_test.py +++ b/roles/openshift_health_checker/test/fluentd_test.py @@ -4,14 +4,6 @@ import json from openshift_checks.logging.fluentd import Fluentd -def canned_fluentd(exec_oc=None): - """Create a Fluentd check object with canned exec_oc method""" - check = Fluentd("dummy") # fails if a module is actually invoked - if exec_oc: - check._exec_oc = exec_oc - return check - - def assert_error(error, expect_error): if expect_error: assert error @@ -103,7 +95,7 @@ fluentd_node3_unlabeled = { ), ]) def test_get_fluentd_pods(pods, nodes, expect_error): - check = canned_fluentd(exec_oc=lambda cmd, args: json.dumps(dict(items=nodes))) - + check = Fluentd() + check.exec_oc = lambda ns, cmd, args: json.dumps(dict(items=nodes)) error = check.check_fluentd(pods) assert_error(error, expect_error) diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py index 3a880d300..0bf492511 100644 --- a/roles/openshift_health_checker/test/kibana_test.py +++ b/roles/openshift_health_checker/test/kibana_test.py @@ -11,14 +11,6 @@ except ImportError: from openshift_checks.logging.kibana import Kibana -def canned_kibana(exec_oc=None): - """Create a Kibana check object with canned exec_oc method""" - check = Kibana() # fails if a module is actually invoked - if exec_oc: - check._exec_oc = exec_oc - return check - - def assert_error(error, expect_error): if expect_error: assert error @@ -68,7 +60,7 @@ not_running_kibana_pod = { ), ]) def test_check_kibana(pods, expect_error): - check = canned_kibana() + check = Kibana() error = check.check_kibana(pods) assert_error(error, expect_error) @@ -137,7 +129,8 @@ def test_check_kibana(pods, expect_error): ), ]) def test_get_kibana_url(route, expect_url, expect_error): - check = canned_kibana(exec_oc=lambda cmd, args: json.dumps(route) if route else "") + check = Kibana() + check.exec_oc = lambda ns, cmd, args: json.dumps(route) if route else "" url, error = check._get_kibana_url() if expect_url: @@ -210,7 +203,7 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch): raise lib_result monkeypatch.setattr(urllib2, 'urlopen', urlopen) - check = canned_kibana() + check = Kibana() check._get_kibana_url = lambda: ('url', None) check._verify_url_internal = lambda url: None diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md index 97650e2ce..84ead3548 100644 --- a/roles/openshift_logging/README.md +++ b/roles/openshift_logging/README.md @@ -135,16 +135,23 @@ Elasticsearch OPS too, if using an OPS cluster: secure_forward forwarder for the node agent Fluentd daemonsets running in the cluster. This can be used to reduce the number of connections to the OpenShift API server, by using `mux` and configuring each node Fluentd to - send raw logs to mux and turn off the k8s metadata plugin. + send raw logs to mux and turn off the k8s metadata plugin. This requires the + use of `openshift_logging_mux_client_mode` (see below). - `openshift_logging_mux_allow_external`: Default `False`. If this is `True`, the `mux` service will be deployed, and it will be configured to allow Fluentd clients running outside of the cluster to send logs using secure_forward. This allows OpenShift logging to be used as a central logging service for clients other than OpenShift, or other OpenShift clusters. -- `openshift_logging_use_mux_client`: Default `False`. If this is `True`, the - node agent Fluentd services will be configured to send logs to the mux - service rather than directly to Elasticsearch. +- `openshift_logging_mux_client_mode`: Values - `minimal`, `maximal`. + Default is unset. Setting this value will cause the Fluentd node agent to + send logs to mux rather than directly to Elasticsearch. The value + `maximal` means that Fluentd will do as much processing as possible at the + node before sending the records to mux. This is the current recommended + way to use mux due to current scaling issues. + The value `minimal` means that Fluentd will do *no* processing at all, and + send the raw logs to mux for processing. We do not currently recommend using + this mode, and ansible will warn you about this. - `openshift_logging_mux_hostname`: Default is "mux." + `openshift_master_default_subdomain`. This is the hostname *external*_ clients will use to connect to mux, and will be used in the TLS server cert diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml index 1c243f934..8b0f4cb62 100644 --- a/roles/openshift_logging/defaults/main.yml +++ b/roles/openshift_logging/defaults/main.yml @@ -157,8 +157,6 @@ openshift_logging_storage_access_modes: "{{ openshift_hosted_logging_storage_acc # mux - secure_forward listener service openshift_logging_mux_allow_external: False openshift_logging_use_mux: "{{ openshift_logging_mux_allow_external | default(False) }}" -# this tells the fluentd node agent to use mux instead of sending directly to Elasticsearch -openshift_logging_use_mux_client: False openshift_logging_mux_hostname: "{{ 'mux.' ~ (openshift_master_default_subdomain | default('router.default.svc.cluster.local', true)) }}" openshift_logging_mux_port: 24284 openshift_logging_mux_cpu_limit: 500m diff --git a/roles/openshift_logging/vars/openshift-enterprise.yml b/roles/openshift_logging/vars/openshift-enterprise.yml index 92e68a0a3..49e8a18af 100644 --- a/roles/openshift_logging/vars/openshift-enterprise.yml +++ b/roles/openshift_logging/vars/openshift-enterprise.yml @@ -1,3 +1,3 @@ --- __openshift_logging_image_prefix: "{{ openshift_hosted_logging_deployer_prefix | default('registry.access.redhat.com/openshift3/') }}" -__openshift_logging_image_version: "{{ openshift_hosted_logging_deployer_version | default ('3.6.0') }}" +__openshift_logging_image_version: "{{ openshift_hosted_logging_deployer_version | default ('v3.6') }}" diff --git a/roles/openshift_logging_fluentd/defaults/main.yml b/roles/openshift_logging_fluentd/defaults/main.yml index be9943b0d..a53bbd2df 100644 --- a/roles/openshift_logging_fluentd/defaults/main.yml +++ b/roles/openshift_logging_fluentd/defaults/main.yml @@ -48,7 +48,6 @@ openshift_logging_fluentd_aggregating_strict: "no" openshift_logging_fluentd_aggregating_cert_path: none openshift_logging_fluentd_aggregating_key_path: none openshift_logging_fluentd_aggregating_passphrase: none -openshift_logging_use_mux_client: False ### Deprecating in 3.6 openshift_logging_fluentd_es_copy: false diff --git a/roles/openshift_logging_fluentd/tasks/main.yaml b/roles/openshift_logging_fluentd/tasks/main.yaml index 55de2ae8d..9dfc6fc86 100644 --- a/roles/openshift_logging_fluentd/tasks/main.yaml +++ b/roles/openshift_logging_fluentd/tasks/main.yaml @@ -23,6 +23,14 @@ msg: openshift_hosted_logging_use_journal is deprecated. Fluentd will automatically detect which logging driver is being used. when: openshift_hosted_logging_use_journal is defined +- fail: + msg: Invalid openshift_logging_mux_client_mode [{{ openshift_logging_mux_client_mode }}], one of {{ __allowed_mux_client_modes }} allowed + when: openshift_logging_mux_client_mode is defined and not openshift_logging_mux_client_mode in __allowed_mux_client_modes + +- debug: + msg: WARNING Use of openshift_logging_mux_client_mode=minimal is not recommended due to current scaling issues + when: openshift_logging_mux_client_mode is defined and openshift_logging_mux_client_mode == 'minimal' + - include: determine_version.yaml # allow passing in a tempdir diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index a4cf9a149..39dffba19 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -64,7 +64,7 @@ spec: readOnly: true - name: filebufferstorage mountPath: /var/lib/fluentd -{% if openshift_logging_use_mux_client | bool %} +{% if openshift_logging_mux_client_mode is defined %} - name: muxcerts mountPath: /etc/fluent/muxkeys readOnly: true @@ -112,10 +112,12 @@ spec: resourceFieldRef: containerName: "{{ daemonset_container_name }}" resource: limits.memory - - name: "USE_MUX_CLIENT" - value: "{{ openshift_logging_use_mux_client | default('false') | lower }}" - name: "FILE_BUFFER_LIMIT" value: "{{ openshift_logging_fluentd_file_buffer_limit | default('1Gi') }}" +{% if openshift_logging_mux_client_mode is defined %} + - name: "MUX_CLIENT_MODE" + value: "{{ openshift_logging_mux_client_mode }}" +{% endif %} volumes: - name: runlogjournal hostPath: @@ -144,7 +146,7 @@ spec: - name: dockerdaemoncfg hostPath: path: /etc/docker -{% if openshift_logging_use_mux_client | bool %} +{% if openshift_logging_mux_client_mode is defined %} - name: muxcerts secret: secretName: logging-mux diff --git a/roles/openshift_logging_fluentd/vars/main.yml b/roles/openshift_logging_fluentd/vars/main.yml index ad3fb0bdd..ec8e565c3 100644 --- a/roles/openshift_logging_fluentd/vars/main.yml +++ b/roles/openshift_logging_fluentd/vars/main.yml @@ -2,3 +2,4 @@ __latest_fluentd_version: "3_5" __allowed_fluentd_versions: ["3_5", "3_6"] __allowed_fluentd_types: ["hosted", "secure-aggregator", "secure-host"] +__allowed_mux_client_modes: ["minimal", "maximal"] diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml index 35fc7146f..7a3da9b4c 100644 --- a/roles/openshift_logging_mux/defaults/main.yml +++ b/roles/openshift_logging_mux/defaults/main.yml @@ -28,6 +28,7 @@ openshift_logging_mux_journal_source: "{{ openshift_hosted_logging_journal_sourc openshift_logging_mux_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}" openshift_logging_mux_allow_external: False +openshift_logging_use_mux: "{{ openshift_logging_mux_allow_external | default(False) }}" openshift_logging_mux_hostname: "{{ 'mux.' ~ (openshift_master_default_subdomain | default('router.default.svc.cluster.local', true)) }}" openshift_logging_mux_port: 24284 # the namespace to use for undefined projects should come first, followed by any diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index e43d9d397..70afe5cee 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -101,8 +101,6 @@ spec: value: "{{ openshift_logging_mux_port }}" - name: USE_MUX value: "true" - - name: MUX_ALLOW_EXTERNAL - value: "{{ openshift_logging_mux_allow_external | default('false') | lower }}" - name: "BUFFER_QUEUE_LIMIT" value: "{{ openshift_logging_mux_buffer_queue_limit }}" - name: "BUFFER_SIZE_LIMIT" diff --git a/roles/openshift_master/tasks/systemd_units.yml b/roles/openshift_master/tasks/systemd_units.yml index dfc255b3d..d71ad3459 100644 --- a/roles/openshift_master/tasks/systemd_units.yml +++ b/roles/openshift_master/tasks/systemd_units.yml @@ -23,7 +23,7 @@ when: openshift.common.is_containerized | bool and not openshift.common.is_master_system_container | bool # workaround for missing systemd unit files -- name: Create the systemd unit files +- name: "Create the {{ openshift.common.service_type }} systemd unit file" template: src: "master_docker/master.docker.service.j2" dest: "{{ containerized_svc_dir }}/{{ openshift.common.service_type }}-master.service" @@ -32,7 +32,7 @@ - not openshift.common.is_master_system_container | bool register: create_master_unit_file -- name: Install Master service file +- name: "Install {{ openshift.common.service_type }} systemd unit file" copy: dest: "/etc/systemd/system/{{ openshift.common.service_type }}-master.service" src: "{{ openshift.common.service_type }}-master.service" @@ -44,7 +44,7 @@ - command: systemctl daemon-reload when: create_master_unit_file | changed -- name: Create the ha systemd unit files +- name: Create the ha systemd unit files for api and controller services template: src: "{{ ha_svc_template_path }}/atomic-openshift-master-{{ item }}.service.j2" dest: "{{ containerized_svc_dir }}/{{ openshift.common.service_type }}-master-{{ item }}.service" diff --git a/roles/openshift_master/templates/atomic-openshift-master.j2 b/roles/openshift_master/templates/atomic-openshift-master.j2 index 850fae0e4..b931f1414 100644 --- a/roles/openshift_master/templates/atomic-openshift-master.j2 +++ b/roles/openshift_master/templates/atomic-openshift-master.j2 @@ -1,6 +1,9 @@ OPTIONS=--loglevel={{ openshift.master.debug_level | default(2) }} CONFIG_FILE={{ openshift_master_config_file }} -{% if openshift_push_via_dns | default(false) %} +{# Preserve existing OPENSHIFT_DEFAULT_REGISTRY settings in scale up runs #} +{% if openshift_master_is_scaleup_host %} +{{ openshift_master_default_registry_value }} +{% elif openshift_push_via_dns | default(false) %} OPENSHIFT_DEFAULT_REGISTRY=docker-registry.default.svc:5000 {% endif %} {% if openshift.common.is_containerized | bool %} diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index af3ebc6d2..7964bbb48 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -164,16 +164,16 @@ masterClients: externalKubernetesClientConnectionOverrides: acceptContentTypes: application/vnd.kubernetes.protobuf,application/json contentType: application/vnd.kubernetes.protobuf - burst: 400 - qps: 200 + burst: {{ openshift_master_external_ratelimit_burst | default(400) }} + qps: {{ openshift_master_external_ratelimit_qps | default(200) }} {% endif %} externalKubernetesKubeConfig: "" {% if openshift.common.version_gte_3_3_or_1_3 | bool %} openshiftLoopbackClientConnectionOverrides: acceptContentTypes: application/vnd.kubernetes.protobuf,application/json contentType: application/vnd.kubernetes.protobuf - burst: 600 - qps: 300 + burst: {{ openshift_master_loopback_ratelimit_burst | default(600) }} + qps: {{ openshift_master_loopback_ratelimit_qps | default(300) }} {% endif %} openshiftLoopbackKubeConfig: openshift-master.kubeconfig masterPublicURL: {{ openshift.master.public_api_url }} diff --git a/roles/openshift_master/templates/native-cluster/atomic-openshift-master-api.j2 b/roles/openshift_master/templates/native-cluster/atomic-openshift-master-api.j2 index c05a27559..63eb3ea1b 100644 --- a/roles/openshift_master/templates/native-cluster/atomic-openshift-master-api.j2 +++ b/roles/openshift_master/templates/native-cluster/atomic-openshift-master-api.j2 @@ -1,6 +1,9 @@ OPTIONS=--loglevel={{ openshift.master.debug_level }} --listen={{ 'https' if openshift.master.api_use_ssl else 'http' }}://{{ openshift.master.bind_addr }}:{{ openshift.master.api_port }} --master={{ openshift.master.loopback_api_url }} CONFIG_FILE={{ openshift_master_config_file }} -{% if openshift_push_via_dns | default(false) %} +{# Preserve existing OPENSHIFT_DEFAULT_REGISTRY settings in scale up runs #} +{% if openshift_master_is_scaleup_host %} +{{ openshift_master_default_registry_value_api }} +{% elif openshift_push_via_dns | default(false) %} OPENSHIFT_DEFAULT_REGISTRY=docker-registry.default.svc:5000 {% endif %} {% if openshift.common.is_containerized | bool %} diff --git a/roles/openshift_master/templates/native-cluster/atomic-openshift-master-controllers.j2 b/roles/openshift_master/templates/native-cluster/atomic-openshift-master-controllers.j2 index a153fb33d..0adfd05b6 100644 --- a/roles/openshift_master/templates/native-cluster/atomic-openshift-master-controllers.j2 +++ b/roles/openshift_master/templates/native-cluster/atomic-openshift-master-controllers.j2 @@ -1,13 +1,16 @@ OPTIONS=--loglevel={{ openshift.master.debug_level }} --listen={{ 'https' if openshift.master.api_use_ssl else 'http' }}://{{ openshift.master.bind_addr }}:{{ openshift.master.controllers_port }} CONFIG_FILE={{ openshift_master_config_file }} -{% if openshift_push_via_dns | default(false) %} +{# Preserve existing OPENSHIFT_DEFAULT_REGISTRY settings in scale up runs #} +{% if openshift_master_is_scaleup_host %} +{{ openshift_master_default_registry_value_controllers }} +{% elif openshift_push_via_dns | default(false) %} OPENSHIFT_DEFAULT_REGISTRY=docker-registry.default.svc:5000 {% endif %} {% if openshift.common.is_containerized | bool %} IMAGE_VERSION={{ openshift_image_tag }} {% endif %} -{% if openshift_cloudprovider_kind | default('') == 'aws' and openshift_cloudprovider_aws_access_key is defined and openshift_cloudprovider_aws_access_key is defined %} +{% if openshift_cloudprovider_kind | default('') == 'aws' and openshift_cloudprovider_aws_access_key is defined and openshift_cloudprovider_aws_secret_key is defined %} AWS_ACCESS_KEY_ID={{ openshift_cloudprovider_aws_access_key }} AWS_SECRET_ACCESS_KEY={{ openshift_cloudprovider_aws_secret_key }} {% endif %} diff --git a/roles/openshift_master/vars/main.yml b/roles/openshift_master/vars/main.yml index c5ba20409..7745d014f 100644 --- a/roles/openshift_master/vars/main.yml +++ b/roles/openshift_master/vars/main.yml @@ -20,3 +20,4 @@ openshift_master_valid_grant_methods: - deny l_is_ha: "{{ openshift.master.ha is defined and openshift.master.ha | bool }}" +openshift_master_is_scaleup_host: False diff --git a/roles/openshift_metrics/vars/openshift-enterprise.yml b/roles/openshift_metrics/vars/openshift-enterprise.yml index b20957550..f0bdac7d2 100644 --- a/roles/openshift_metrics/vars/openshift-enterprise.yml +++ b/roles/openshift_metrics/vars/openshift-enterprise.yml @@ -1,3 +1,3 @@ --- __openshift_metrics_image_prefix: "{{ openshift_hosted_metrics_deployer_prefix | default('registry.access.redhat.com/openshift3/') }}" -__openshift_metrics_image_version: "{{ openshift_hosted_metrics_deployer_version | default ('3.6.0') }}" +__openshift_metrics_image_version: "{{ openshift_hosted_metrics_deployer_version | default ('v3.6') }}" diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml index 600d8f676..19eb3cdf7 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml @@ -15,7 +15,7 @@ oc_project: state: present name: "{{ glusterfs_namespace }}" - when: glusterfs_is_native or glusterfs_heketi_is_native + when: glusterfs_is_native or glusterfs_heketi_is_native or glusterfs_storageclass - name: Delete pre-existing heketi resources oc_obj: |