summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--filter_plugins/oo_filters.py15
-rw-r--r--playbooks/byo/openshift-cluster/openshift-prometheus.yml4
-rw-r--r--playbooks/common/openshift-cluster/openshift_hosted.yml3
-rw-r--r--playbooks/common/openshift-cluster/openshift_prometheus.yml9
-rw-r--r--roles/docker/tasks/systemcontainer_crio.yml9
-rw-r--r--roles/docker/templates/crio.conf.j25
-rw-r--r--roles/flannel_register/defaults/main.yaml5
-rw-r--r--roles/flannel_register/templates/flannel-config.json1
-rw-r--r--roles/openshift_cfme/defaults/main.yml3
-rw-r--r--roles/openshift_cfme/tasks/nfs.yml7
-rw-r--r--roles/openshift_health_checker/library/aos_version.py31
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py10
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_version.py1
-rw-r--r--roles/openshift_health_checker/test/package_version_test.py2
-rw-r--r--roles/openshift_hosted/tasks/registry/registry.yml8
-rw-r--r--roles/openshift_hosted/tasks/router/router.yml18
-rw-r--r--roles/openshift_logging_curator/templates/curator.j22
-rw-r--r--roles/openshift_logging_elasticsearch/templates/es.j22
-rw-r--r--roles/openshift_logging_fluentd/templates/fluentd.j22
-rw-r--r--roles/openshift_logging_kibana/templates/kibana.j24
-rw-r--r--roles/openshift_logging_mux/templates/mux.j22
-rw-r--r--roles/openshift_master/tasks/systemd_units.yml2
-rw-r--r--roles/openshift_node/templates/node.yaml.v1.j22
-rw-r--r--roles/openshift_prometheus/README.md95
-rw-r--r--roles/openshift_prometheus/defaults/main.yaml74
-rw-r--r--roles/openshift_prometheus/files/openshift_prometheus.exports3
-rw-r--r--roles/openshift_prometheus/meta/main.yaml19
-rw-r--r--roles/openshift_prometheus/tasks/create_pvs.yaml36
-rw-r--r--roles/openshift_prometheus/tasks/install_prometheus.yaml241
-rw-r--r--roles/openshift_prometheus/tasks/main.yaml26
-rw-r--r--roles/openshift_prometheus/tasks/nfs.yaml44
-rw-r--r--roles/openshift_prometheus/templates/alertmanager.yml.j220
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-server.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prometheus.rules.j24
-rw-r--r--roles/openshift_prometheus/templates/prometheus.yml.j2174
-rw-r--r--roles/openshift_prometheus/templates/prometheus_deployment.j2240
-rw-r--r--roles/openshift_prometheus/tests/inventory2
-rw-r--r--roles/openshift_prometheus/tests/test.yaml5
40 files changed, 1138 insertions, 37 deletions
diff --git a/filter_plugins/oo_filters.py b/filter_plugins/oo_filters.py
index 36a90a870..277695f78 100644
--- a/filter_plugins/oo_filters.py
+++ b/filter_plugins/oo_filters.py
@@ -1024,6 +1024,18 @@ def oo_contains_rule(source, apiGroups, resources, verbs):
return False
+def oo_selector_to_string_list(user_dict):
+ """Convert a dict of selectors to a key=value list of strings
+
+Given input of {'region': 'infra', 'zone': 'primary'} returns a list
+of items as ['region=infra', 'zone=primary']
+ """
+ selectors = []
+ for key in user_dict:
+ selectors.append("{}={}".format(key, user_dict[key]))
+ return selectors
+
+
class FilterModule(object):
""" Custom ansible filter mapping """
@@ -1065,5 +1077,6 @@ class FilterModule(object):
"oo_openshift_loadbalancer_backends": oo_openshift_loadbalancer_backends,
"to_padded_yaml": to_padded_yaml,
"oo_random_word": oo_random_word,
- "oo_contains_rule": oo_contains_rule
+ "oo_contains_rule": oo_contains_rule,
+ "oo_selector_to_string_list": oo_selector_to_string_list
}
diff --git a/playbooks/byo/openshift-cluster/openshift-prometheus.yml b/playbooks/byo/openshift-cluster/openshift-prometheus.yml
new file mode 100644
index 000000000..15917078d
--- /dev/null
+++ b/playbooks/byo/openshift-cluster/openshift-prometheus.yml
@@ -0,0 +1,4 @@
+---
+- include: initialize_groups.yml
+
+- include: ../../common/openshift-cluster/openshift_prometheus.yml
diff --git a/playbooks/common/openshift-cluster/openshift_hosted.yml b/playbooks/common/openshift-cluster/openshift_hosted.yml
index 99a634970..a391b963a 100644
--- a/playbooks/common/openshift-cluster/openshift_hosted.yml
+++ b/playbooks/common/openshift-cluster/openshift_hosted.yml
@@ -49,6 +49,9 @@
- role: cockpit-ui
when: ( openshift.common.version_gte_3_3_or_1_3 | bool ) and ( openshift_hosted_manage_registry | default(true) | bool ) and not (openshift.docker.hosted_registry_insecure | default(false) | bool)
+ - role: openshift_prometheus
+ when: openshift_hosted_prometheus_deploy | default(false) | bool
+
- name: Update master-config for publicLoggingURL
hosts: oo_masters_to_config:!oo_first_master
tags:
diff --git a/playbooks/common/openshift-cluster/openshift_prometheus.yml b/playbooks/common/openshift-cluster/openshift_prometheus.yml
new file mode 100644
index 000000000..a979c0c00
--- /dev/null
+++ b/playbooks/common/openshift-cluster/openshift_prometheus.yml
@@ -0,0 +1,9 @@
+---
+- include: std_include.yml
+
+- name: OpenShift Prometheus
+ hosts: oo_first_master
+ roles:
+ - openshift_prometheus
+ vars:
+ openshift_prometheus_state: present
diff --git a/roles/docker/tasks/systemcontainer_crio.yml b/roles/docker/tasks/systemcontainer_crio.yml
index 8f81273e5..24ca0d9f8 100644
--- a/roles/docker/tasks/systemcontainer_crio.yml
+++ b/roles/docker/tasks/systemcontainer_crio.yml
@@ -3,6 +3,15 @@
- set_fact:
l_insecure_crio_registries: "{{ '\"{}\"'.format('\", \"'.join(openshift.docker.insecure_registries)) }}"
when: openshift.docker.insecure_registries
+- set_fact:
+ l_crio_registries: "{{ openshift.docker.additional_registries + ['docker.io'] }}"
+ when: openshift.docker.additional_registries
+- set_fact:
+ l_crio_registries: "{{ ['docker.io'] }}"
+ when: not openshift.docker.additional_registries
+- set_fact:
+ l_additional_crio_registries: "{{ '\"{}\"'.format('\", \"'.join(l_crio_registries)) }}"
+ when: openshift.docker.additional_registries
- name: Ensure container-selinux is installed
package:
diff --git a/roles/docker/templates/crio.conf.j2 b/roles/docker/templates/crio.conf.j2
index 5b31932b1..b4ee84fd0 100644
--- a/roles/docker/templates/crio.conf.j2
+++ b/roles/docker/templates/crio.conf.j2
@@ -120,6 +120,11 @@ insecure_registries = [
{{ l_insecure_crio_registries|default("") }}
]
+# registries is used to specify a comma separated list of registries to be used
+# when pulling an unqualified image (e.g. fedora:rawhide).
+registries = [
+{{ l_additional_crio_registries|default("") }}
+]
# The "crio.network" table contains settings pertaining to the
# management of CNI plugins.
[crio.network]
diff --git a/roles/flannel_register/defaults/main.yaml b/roles/flannel_register/defaults/main.yaml
index ddf8230ec..71c8f38c3 100644
--- a/roles/flannel_register/defaults/main.yaml
+++ b/roles/flannel_register/defaults/main.yaml
@@ -1,7 +1,6 @@
---
-flannel_network: "{{ openshift.common.portal_net | default('172.30.0.0/16', true) }}"
-flannel_min_network: 172.30.5.0
-flannel_subnet_len: 24
+flannel_network: "{{ openshift.master.sdn_cluster_network_cidr }}"
+flannel_subnet_len: "{{ 32 - openshift.master.sdn_host_subnet_length }}"
flannel_etcd_key: /openshift.com/network
etcd_hosts: "{{ etcd_urls }}"
etcd_conf_dir: "{{ openshift.common.config_base }}/master"
diff --git a/roles/flannel_register/templates/flannel-config.json b/roles/flannel_register/templates/flannel-config.json
index 89ce4c30b..bba3729fa 100644
--- a/roles/flannel_register/templates/flannel-config.json
+++ b/roles/flannel_register/templates/flannel-config.json
@@ -1,7 +1,6 @@
{
"Network": "{{ flannel_network }}",
"SubnetLen": {{ flannel_subnet_len }},
- "SubnetMin": "{{ flannel_min_network }}",
"Backend": {
"Type": "host-gw"
}
diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml
index 8aa57e75a..b82c2e602 100644
--- a/roles/openshift_cfme/defaults/main.yml
+++ b/roles/openshift_cfme/defaults/main.yml
@@ -27,9 +27,6 @@ openshift_cfme_pv_data:
# Tuning parameter to use more than 5 images at once from an ImageStream
openshift_cfme_maxImagesBulkImportedPerRepository: 100
-# Hostname/IP of the NFS server. Currently defaults to first master
-openshift_cfme_nfs_server: "{{ groups.nfs.0 }}"
-openshift_cfme_nfs_directory: "/exports"
# TODO: Refactor '_install_app' variable. This is just for testing but
# maybe in the future it should control the entire yes/no for CFME.
#
diff --git a/roles/openshift_cfme/tasks/nfs.yml b/roles/openshift_cfme/tasks/nfs.yml
index 8db45492e..ca04628a8 100644
--- a/roles/openshift_cfme/tasks/nfs.yml
+++ b/roles/openshift_cfme/tasks/nfs.yml
@@ -1,6 +1,13 @@
---
# Tasks to statically provision NFS volumes
# Include if not using dynamic volume provisioning
+
+- name: Set openshift_cfme_nfs_server fact
+ when: openshift_cfme_nfs_server is not defined
+ set_fact:
+ # Hostname/IP of the NFS server. Currently defaults to first master
+ openshift_cfme_nfs_server: "{{ oo_nfs_to_config.0 }}"
+
- name: Ensure the /exports/ directory exists
file:
path: /exports/
diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py
index c8769b511..db3c0b654 100644
--- a/roles/openshift_health_checker/library/aos_version.py
+++ b/roles/openshift_health_checker/library/aos_version.py
@@ -26,15 +26,13 @@ from ansible.module_utils.six import string_types
YUM_IMPORT_EXCEPTION = None
DNF_IMPORT_EXCEPTION = None
-PKG_MGR = None
try:
import yum # pylint: disable=import-error
- PKG_MGR = "yum"
except ImportError as err:
YUM_IMPORT_EXCEPTION = err
+
try:
import dnf # pylint: disable=import-error
- PKG_MGR = "dnf"
except ImportError as err:
DNF_IMPORT_EXCEPTION = err
@@ -51,14 +49,19 @@ def main():
module = AnsibleModule(
argument_spec=dict(
package_list=dict(type="list", required=True),
+ package_mgr=dict(type="str", required=True),
),
supports_check_mode=True
)
- if YUM_IMPORT_EXCEPTION and DNF_IMPORT_EXCEPTION:
+ # determine the package manager to use
+ package_mgr = module.params['package_mgr']
+ if package_mgr not in ('yum', 'dnf'):
+ module.fail_json(msg="package_mgr must be one of: yum, dnf")
+ pkg_mgr_exception = dict(yum=YUM_IMPORT_EXCEPTION, dnf=DNF_IMPORT_EXCEPTION)[package_mgr]
+ if pkg_mgr_exception:
module.fail_json(
- msg="aos_version module could not import yum or dnf: %s %s" %
- (YUM_IMPORT_EXCEPTION, DNF_IMPORT_EXCEPTION)
+ msg="aos_version module could not import {}: {}".format(package_mgr, pkg_mgr_exception)
)
# determine the packages we will look for
@@ -78,7 +81,7 @@ def main():
# get the list of packages available and complain if anything is wrong
try:
- pkgs = _retrieve_available_packages(expected_pkg_names)
+ pkgs = _retrieve_available_packages(package_mgr, expected_pkg_names)
if versioned_pkgs:
_check_precise_version_found(pkgs, _to_dict(versioned_pkgs))
_check_higher_version_found(pkgs, _to_dict(versioned_pkgs))
@@ -93,7 +96,7 @@ def _to_dict(pkg_list):
return {pkg["name"]: pkg for pkg in pkg_list}
-def _retrieve_available_packages(expected_pkgs):
+def _retrieve_available_packages(pkg_mgr, expected_pkgs):
# The openshift excluder prevents unintended updates to openshift
# packages by setting yum excludes on those packages. See:
# https://wiki.centos.org/SpecialInterestGroup/PaaS/OpenShift-Origin-Control-Updates
@@ -103,14 +106,15 @@ def _retrieve_available_packages(expected_pkgs):
# be excluded. So, for our purposes here, disable excludes to see
# what will really be available during an install or upgrade.
- if PKG_MGR == "yum":
+ if pkg_mgr == "yum":
# search for package versions available for openshift pkgs
yb = yum.YumBase() # pylint: disable=invalid-name
yb.conf.disable_excludes = ['all']
try:
- pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs)
+ pkgs = yb.rpmdb.returnPackages(patterns=expected_pkgs)
+ pkgs += yb.pkgSack.returnPackages(patterns=expected_pkgs)
except yum.Errors.PackageSackError as excinfo:
# you only hit this if *none* of the packages are available
raise AosVersionException('\n'.join([
@@ -118,7 +122,7 @@ def _retrieve_available_packages(expected_pkgs):
'Check your subscription and repo settings.',
str(excinfo),
]))
- elif PKG_MGR == "dnf":
+ elif pkg_mgr == "dnf":
dbase = dnf.Base() # pyling: disable=invalid-name
dbase.conf.disable_excludes = ['all']
@@ -127,8 +131,11 @@ def _retrieve_available_packages(expected_pkgs):
dquery = dbase.sack.query()
aquery = dquery.available()
+ iquery = dquery.installed()
- pkgs = list(aquery.filter(name=expected_pkgs))
+ available_pkgs = list(aquery.filter(name=expected_pkgs))
+ installed_pkgs = list(iquery.filter(name=expected_pkgs))
+ pkgs = available_pkgs + installed_pkgs
if not pkgs:
# pkgs list is empty, raise because no expected packages found
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 857a80c74..866c74d7c 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -32,6 +32,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
# we use python-docker-py to check local docker for images, and skopeo
# to look for images available remotely without waiting to pull them.
dependencies = ["python-docker-py", "skopeo"]
+ skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false"
def is_active(self):
"""Skip hosts with unsupported deployment types."""
@@ -67,8 +68,10 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"failed": True,
"msg": (
"One or more required Docker images are not available:\n {}\n"
- "Configured registries: {}"
- ).format(",\n ".join(sorted(unavailable_images)), ", ".join(registries)),
+ "Configured registries: {}\n"
+ "Checked by: {}"
+ ).format(",\n ".join(sorted(unavailable_images)), ", ".join(registries),
+ self.skopeo_img_check_command),
}
return {}
@@ -169,8 +172,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
for registry in registries:
args = {
- "_raw_params": "timeout 10 skopeo inspect --tls-verify=false "
- "docker://{}/{}".format(registry, image)
+ "_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image)
}
result = self.execute_module("command", args)
if result.get("rc", 0) == 0 and not result.get("failed"):
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 8b780114f..0b795b6c4 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -46,6 +46,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
check_multi_minor_release = deployment_type in ['openshift-enterprise']
args = {
+ "package_mgr": self.get_var("ansible_pkg_mgr"),
"package_list": [
{
"name": "openvswitch",
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index 6054d3f3e..e871f39f0 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -5,6 +5,7 @@ from openshift_checks.package_version import PackageVersion, OpenShiftCheckExcep
def task_vars_for(openshift_release, deployment_type):
return dict(
+ ansible_pkg_mgr='yum',
openshift=dict(common=dict(service_type=deployment_type)),
openshift_release=openshift_release,
openshift_image_tag='v' + openshift_release,
@@ -27,6 +28,7 @@ def test_openshift_version_not_supported():
def test_invalid_openshift_release_format():
task_vars = dict(
+ ansible_pkg_mgr='yum',
openshift=dict(common=dict(service_type='origin')),
openshift_image_tag='v0',
openshift_deployment_type='origin',
diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml
index 3e424da12..d73c290ff 100644
--- a/roles/openshift_hosted/tasks/registry/registry.yml
+++ b/roles/openshift_hosted/tasks/registry/registry.yml
@@ -61,6 +61,14 @@
openshift_hosted_registry_env_vars: "{{ openshift_hosted_registry_env_vars | combine({'OPENSHIFT_DEFAULT_REGISTRY':'docker-registry.default.svc:5000'}) }}"
when: openshift_push_via_dns | default(false) | bool
+- name: Update registry proxy settings for dc/docker-registry
+ set_fact:
+ openshift_hosted_registry_env_vars: "{{ {'HTTPS_PROXY': (openshift.common.https_proxy | default('')),
+ 'HTTP_PROXY': (openshift.common.http_proxy | default('')),
+ 'NO_PROXY': (openshift.common.no_proxy | default(''))}
+ | combine(openshift_hosted_registry_env_vars) }}"
+ when: (openshift.common.https_proxy | default(False)) or (openshift.common.http_proxy | default('')) != ''
+
- name: Create the registry service account
oc_serviceaccount:
name: "{{ openshift_hosted_registry_serviceaccount }}"
diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml
index e57ed733e..68ec7233e 100644
--- a/roles/openshift_hosted/tasks/router/router.yml
+++ b/roles/openshift_hosted/tasks/router/router.yml
@@ -18,6 +18,15 @@
openshift_hosted_router_selector: "{{ openshift.hosted.router.selector | default(None) }}"
openshift_hosted_router_image: "{{ openshift.hosted.router.registryurl }}"
+- name: Get the certificate contents for router
+ copy:
+ backup: True
+ dest: "/etc/origin/master/{{ item | basename }}"
+ src: "{{ item }}"
+ with_items: "{{ openshift_hosted_routers | oo_collect(attribute='certificate') |
+ oo_select_keys_from_list(['keyfile', 'certfile', 'cafile']) }}"
+ when: ( not openshift_hosted_router_create_certificate | bool ) or openshift_hosted_router_certificate != {}
+
# This is for when we desire a cluster signed cert
# The certificate is generated and placed in master_config_dir/
- block:
@@ -43,15 +52,6 @@
# End Block
when: ( openshift_hosted_router_create_certificate | bool ) and openshift_hosted_router_certificate == {}
-- name: Get the certificate contents for router
- copy:
- backup: True
- dest: "/etc/origin/master/{{ item | basename }}"
- src: "{{ item }}"
- with_items: "{{ openshift_hosted_routers | oo_collect(attribute='certificate') |
- oo_select_keys_from_list(['keyfile', 'certfile', 'cafile']) }}"
- when: not openshift_hosted_router_create_certificate | bool
-
- name: Create the router service account(s)
oc_serviceaccount:
name: "{{ item.serviceaccount }}"
diff --git a/roles/openshift_logging_curator/templates/curator.j2 b/roles/openshift_logging_curator/templates/curator.j2
index 6431f86d9..e74918a40 100644
--- a/roles/openshift_logging_curator/templates/curator.j2
+++ b/roles/openshift_logging_curator/templates/curator.j2
@@ -44,6 +44,8 @@ spec:
cpu: "{{curator_cpu_limit}}"
{% if curator_memory_limit is defined and curator_memory_limit is not none and curator_memory_limit != "" %}
memory: "{{curator_memory_limit}}"
+ requests:
+ memory: "{{curator_memory_limit}}"
{% endif %}
env:
-
diff --git a/roles/openshift_logging_elasticsearch/templates/es.j2 b/roles/openshift_logging_elasticsearch/templates/es.j2
index cbe6b89f2..5f2932541 100644
--- a/roles/openshift_logging_elasticsearch/templates/es.j2
+++ b/roles/openshift_logging_elasticsearch/templates/es.j2
@@ -48,7 +48,7 @@ spec:
cpu: "{{es_cpu_limit}}"
{% endif %}
requests:
- memory: "512Mi"
+ memory: "{{es_memory_limit}}"
ports:
-
containerPort: 9200
diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2
index 88e039e3f..a4afb6618 100644
--- a/roles/openshift_logging_fluentd/templates/fluentd.j2
+++ b/roles/openshift_logging_fluentd/templates/fluentd.j2
@@ -36,6 +36,8 @@ spec:
limits:
cpu: {{ openshift_logging_fluentd_cpu_limit }}
memory: {{ openshift_logging_fluentd_memory_limit }}
+ requests:
+ memory: {{ openshift_logging_fluentd_memory_limit }}
volumeMounts:
- name: runlogjournal
mountPath: /run/log/journal
diff --git a/roles/openshift_logging_kibana/templates/kibana.j2 b/roles/openshift_logging_kibana/templates/kibana.j2
index 512d99d06..da1386d3e 100644
--- a/roles/openshift_logging_kibana/templates/kibana.j2
+++ b/roles/openshift_logging_kibana/templates/kibana.j2
@@ -46,6 +46,8 @@ spec:
{% endif %}
{% if kibana_memory_limit is not none and kibana_memory_limit != "" %}
memory: "{{ kibana_memory_limit }}"
+ requests:
+ memory: "{{ kibana_memory_limit }}"
{% endif %}
{% endif %}
env:
@@ -82,6 +84,8 @@ spec:
{% endif %}
{% if kibana_proxy_memory_limit is not none and kibana_proxy_memory_limit != "" %}
memory: "{{ kibana_proxy_memory_limit }}"
+ requests:
+ memory: "{{ kibana_proxy_memory_limit }}"
{% endif %}
{% endif %}
ports:
diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2
index 70afe5cee..ff18d3270 100644
--- a/roles/openshift_logging_mux/templates/mux.j2
+++ b/roles/openshift_logging_mux/templates/mux.j2
@@ -45,6 +45,8 @@ spec:
{% endif %}
{% if mux_memory_limit is not none %}
memory: "{{mux_memory_limit}}"
+ requests:
+ memory: "{{mux_memory_limit}}"
{% endif %}
{% endif %}
ports:
diff --git a/roles/openshift_master/tasks/systemd_units.yml b/roles/openshift_master/tasks/systemd_units.yml
index 782a35abe..c480d8223 100644
--- a/roles/openshift_master/tasks/systemd_units.yml
+++ b/roles/openshift_master/tasks/systemd_units.yml
@@ -7,7 +7,7 @@
# openshift_master_config_dir is set.
- name: Set openshift_master_config_dir if unset
set_fact:
- openshift_master_config_dir: '/var/lib/origin'
+ openshift_master_config_dir: '/etc/origin/master'
when: openshift_master_config_dir is not defined
- name: Remove the legacy master service if it exists
diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2
index 711afcadb..f59aa6fb4 100644
--- a/roles/openshift_node/templates/node.yaml.v1.j2
+++ b/roles/openshift_node/templates/node.yaml.v1.j2
@@ -21,8 +21,6 @@ kubeletArguments: {{ openshift.node.kubelet_args | default(None) | to_padded_yam
- remote
container-runtime-endpoint:
- /var/run/crio.sock
- experimental-cri:
- - 'true'
image-service-endpoint:
- /var/run/crio.sock
node-labels:
diff --git a/roles/openshift_prometheus/README.md b/roles/openshift_prometheus/README.md
new file mode 100644
index 000000000..c5a44bffb
--- /dev/null
+++ b/roles/openshift_prometheus/README.md
@@ -0,0 +1,95 @@
+OpenShift Prometheus
+====================
+
+OpenShift Prometheus Installation
+
+Requirements
+------------
+
+
+Role Variables
+--------------
+
+For default values, see [`defaults/main.yaml`](defaults/main.yaml).
+
+- `openshift_prometheus_state`: present - install/update. absent - uninstall.
+
+- `openshift_prometheus_namespace`: project (i.e. namespace) where the components will be
+ deployed.
+
+- `openshift_prometheus_replicas`: The number of replicas for prometheus deployment.
+
+- `openshift_prometheus_node_selector`: Selector for the nodes prometheus will be deployed on.
+
+- `openshift_prometheus_image_<COMPONENT>`: specify image for the component
+
+## Storage related variables
+Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can set pv claim by setting corresponding role variable:
+```
+openshift_prometheus_<COMPONENT>_storage_type: <VALUE>
+openshift_prometheus_<COMPONENT>_pvc_(name|size|access_modes|pv_selector): <VALUE>
+```
+e.g
+```
+openshift_prometheus_storage_type: pvc
+openshift_prometheus_alertmanager_pvc_name: alertmanager
+openshift_prometheus_alertbuffer_pvc_size: 10G
+openshift_prometheus_pvc_access_modes: [ReadWriteOnce]
+```
+
+## Additional Alert Rules file variable
+An external file with alert rules can be added by setting path to additional rules variable:
+```
+openshift_prometheus_additional_rules_file: <PATH>
+```
+
+File content should be in prometheus alert rules format.
+Following example sets rule to fire an alert when one of the cluster nodes is down:
+
+```
+groups:
+- name: example-rules
+ interval: 30s # defaults to global interval
+ rules:
+ - alert: Node Down
+ expr: up{job="kubernetes-nodes"} == 0
+ annotations:
+ miqTarget: "ContainerNode"
+ severity: "HIGH"
+ message: "{{ '{{' }}{{ '$labels.instance' }}{{ '}}' }} is down"
+```
+
+
+## Additional variables to control resource limits
+Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can specify a cpu and memory limits and requests by setting
+the corresponding role variable:
+```
+openshift_prometheus_<COMPONENT>_(limits|requests)_(memory|cpu): <VALUE>
+```
+e.g
+```
+openshift_prometheus_alertmanager_limits_memory: 1Gi
+openshift_prometheus_oath_proxy_requests_cpu: 100
+```
+
+Dependencies
+------------
+
+openshift_facts
+
+
+Example Playbook
+----------------
+
+```
+- name: Configure openshift-prometheus
+ hosts: oo_first_master
+ roles:
+ - role: openshift_prometheus
+```
+
+License
+-------
+
+Apache License, Version 2.0
+
diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml
new file mode 100644
index 000000000..18d6a1645
--- /dev/null
+++ b/roles/openshift_prometheus/defaults/main.yaml
@@ -0,0 +1,74 @@
+---
+# defaults file for openshift_prometheus
+openshift_prometheus_state: present
+
+openshift_prometheus_namespace: prometheus
+
+openshift_prometheus_replicas: 1
+openshift_prometheus_node_selector: {"region":"infra"}
+
+# images
+openshift_prometheus_image_proxy: "openshift/oauth-proxy:v1.0.0"
+openshift_prometheus_image_prometheus: "openshift/prometheus:v2.0.0-dev"
+openshift_prometheus_image_alertmanager: "openshift/prometheus-alertmanager:dev"
+openshift_prometheus_image_alertbuffer: "ilackarms/message-buffer"
+
+# additional prometheus rules file
+openshift_prometheus_additional_rules_file: null
+
+# All the required exports
+openshift_prometheus_pv_exports:
+ - prometheus
+ - prometheus-alertmanager
+ - prometheus-alertbuffer
+# PV template files and their created object names
+openshift_prometheus_pv_data:
+ - pv_name: prometheus
+ pv_template: prom-pv-server.yml
+ pv_label: Prometheus Server PV
+ - pv_name: prometheus-alertmanager
+ pv_template: prom-pv-alertmanager.yml
+ pv_label: Prometheus Alertmanager PV
+ - pv_name: prometheus-alertbuffer
+ pv_template: prom-pv-alertbuffer.yml
+ pv_label: Prometheus Alert Buffer PV
+
+# Hostname/IP of the NFS server. Currently defaults to first master
+openshift_prometheus_nfs_server: "{{ groups.nfs.0 }}"
+
+# storage
+openshift_prometheus_storage_type: pvc
+openshift_prometheus_pvc_name: prometheus
+openshift_prometheus_pvc_size: 10G
+openshift_prometheus_pvc_access_modes: [ReadWriteOnce]
+openshift_prometheus_pvc_pv_selector: {}
+
+openshift_prometheus_alertmanager_storage_type: pvc
+openshift_prometheus_alertmanager_pvc_name: prometheus-alertmanager
+openshift_prometheus_alertmanager_pvc_size: 10G
+openshift_prometheus_alertmanager_pvc_access_modes: [ReadWriteOnce]
+openshift_prometheus_alertmanager_pvc_pv_selector: {}
+
+openshift_prometheus_alertbuffer_storage_type: pvc
+openshift_prometheus_alertbuffer_pvc_name: prometheus-alertbuffer
+openshift_prometheus_alertbuffer_pvc_size: 10G
+openshift_prometheus_alertbuffer_pvc_access_modes: [ReadWriteOnce]
+openshift_prometheus_alertbuffer_pvc_pv_selector: {}
+
+# container resources
+openshift_prometheus_cpu_limit: null
+openshift_prometheus_memory_limit: null
+openshift_prometheus_cpu_requests: null
+openshift_prometheus_memory_requests: null
+openshift_prometheus_alertmanager_cpu_limit: null
+openshift_prometheus_alertmanager_memory_limit: null
+openshift_prometheus_alertmanager_cpu_requests: null
+openshift_prometheus_alertmanager_memory_requests: null
+openshift_prometheus_alertbuffer_cpu_limit: null
+openshift_prometheus_alertbuffer_memory_limit: null
+openshift_prometheus_alertbuffer_cpu_requests: null
+openshift_prometheus_alertbuffer_memory_requests: null
+openshift_prometheus_oauth_proxy_cpu_limit: null
+openshift_prometheus_oauth_proxy_memory_limit: null
+openshift_prometheus_oauth_proxy_cpu_requests: null
+openshift_prometheus_oauth_proxy_memory_requests: null
diff --git a/roles/openshift_prometheus/files/openshift_prometheus.exports b/roles/openshift_prometheus/files/openshift_prometheus.exports
new file mode 100644
index 000000000..3ccedb1fd
--- /dev/null
+++ b/roles/openshift_prometheus/files/openshift_prometheus.exports
@@ -0,0 +1,3 @@
+/exports/prometheus *(rw,no_root_squash,no_wdelay)
+/exports/prometheus-alertmanager *(rw,no_root_squash,no_wdelay)
+/exports/prometheus-alertbuffer *(rw,no_root_squash,no_wdelay)
diff --git a/roles/openshift_prometheus/meta/main.yaml b/roles/openshift_prometheus/meta/main.yaml
new file mode 100644
index 000000000..33188bb7e
--- /dev/null
+++ b/roles/openshift_prometheus/meta/main.yaml
@@ -0,0 +1,19 @@
+---
+galaxy_info:
+ author: OpenShift Development <dev@lists.openshift.redhat.com>
+ description: Deploy OpenShift prometheus integration for the cluster
+ company: Red Hat, Inc.
+ license: license (Apache)
+ min_ansible_version: 2.2
+ platforms:
+ - name: EL
+ versions:
+ - 7
+ - name: Fedora
+ versions:
+ - all
+ categories:
+ - openshift
+dependencies:
+- { role: lib_openshift }
+- { role: openshift_facts }
diff --git a/roles/openshift_prometheus/tasks/create_pvs.yaml b/roles/openshift_prometheus/tasks/create_pvs.yaml
new file mode 100644
index 000000000..4e79da05f
--- /dev/null
+++ b/roles/openshift_prometheus/tasks/create_pvs.yaml
@@ -0,0 +1,36 @@
+---
+# Check for existance and then conditionally:
+# - evaluate templates
+# - PVs
+#
+# These tasks idempotently create required Prometheus PV objects. Do not
+# call this file directly. This file is intended to be ran as an
+# include that has a 'with_items' attached to it. Hence the use below
+# of variables like "{{ item.pv_label }}"
+
+- name: "Check if the {{ item.pv_label }} template has been created already"
+ oc_obj:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ state: list
+ kind: pv
+ name: "{{ item.pv_name }}"
+ register: prom_pv_check
+
+# Skip all of this if the PV already exists
+- block:
+ - name: "Ensure the {{ item.pv_label }} template is evaluated"
+ template:
+ src: "{{ item.pv_template }}.j2"
+ dest: "{{ tempdir }}/templates/{{ item.pv_template }}"
+
+ - name: "Ensure {{ item.pv_label }} is created"
+ oc_obj:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ kind: pv
+ name: "{{ item.pv_name }}"
+ state: present
+ delete_after: True
+ files:
+ - "{{ tempdir }}/templates/{{ item.pv_template }}"
+ when:
+ - not prom_pv_check.results.results.0
diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml
new file mode 100644
index 000000000..93bdda3e8
--- /dev/null
+++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml
@@ -0,0 +1,241 @@
+---
+
+# namespace
+- name: Add prometheus project
+ oc_project:
+ state: "{{ state }}"
+ name: "{{ openshift_prometheus_namespace }}"
+ node_selector: "{{ openshift_prometheus_node_selector | oo_selector_to_string_list() }}"
+ description: Prometheus
+
+# secrets
+- name: Set alert and prometheus secrets
+ oc_secret:
+ state: "{{ state }}"
+ name: "{{ item }}-proxy"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ contents:
+ - path: session_secret
+ data: "{{ 43 | oo_random_word }}="
+ with_items:
+ - prometheus
+ - alerts
+
+# serviceaccount
+- name: create prometheus serviceaccount
+ oc_serviceaccount:
+ state: "{{ state }}"
+ name: prometheus
+ namespace: "{{ openshift_prometheus_namespace }}"
+ # TODO add annotations when supproted
+ # annotations:
+ # serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
+ # serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
+
+ secrets:
+ - prometheus-secrets
+ changed_when: no
+
+# TODO remove this when annotations are supported by oc_serviceaccount
+- name: annotate serviceaccount
+ command: >
+ {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
+ serviceaccount prometheus
+ serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
+ serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
+
+
+# create clusterrolebinding for prometheus serviceaccount
+- name: Set cluster-reader permissions for prometheus
+ oc_adm_policy_user:
+ state: "{{ state }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ resource_kind: cluster-role
+ resource_name: cluster-reader
+ user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus"
+
+
+######################################################################
+# NFS
+# In the case that we are not running on a cloud provider, volumes must be statically provisioned
+
+- include: nfs.yaml
+ when: not (openshift_cloudprovider_kind is defined and (openshift_cloudprovider_kind == 'aws' or openshift_cloudprovider_kind == 'gce'))
+
+
+# create prometheus and alerts services
+# TODO join into 1 task with loop
+- name: Create prometheus service
+ oc_service:
+ state: "{{ state }}"
+ name: "{{ item.name }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ selector:
+ app: prometheus
+ labels:
+ name: "{{ item.name }}"
+ # TODO add annotations when supported
+ # annotations:
+ # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+ ports:
+ - port: 443
+ targetPort: 8443
+ with_items:
+ - name: prometheus
+
+- name: Create alerts service
+ oc_service:
+ state: "{{ state }}"
+ name: "{{ item.name }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ selector:
+ app: prometheus
+ labels:
+ name: "{{ item.name }}"
+ # TODO add annotations when supported
+ # annotations:
+ # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+ ports:
+ - port: 443
+ targetPort: 9443
+ with_items:
+ - name: alerts
+
+
+# Annotate services with secret name
+# TODO remove this when annotations are supported by oc_service
+- name: annotate prometheus service
+ command: >
+ {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
+ service prometheus 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls'
+
+- name: annotate alerts service
+ command: >
+ {{ openshift.common.client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
+ service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls'
+
+# create prometheus and alerts routes
+- name: create prometheus and alerts routes
+ oc_route:
+ state: "{{ state }}"
+ name: "{{ item.name }}"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ service_name: "{{ item.name }}"
+ tls_termination: reencrypt
+ with_items:
+ - name: prometheus
+ - name: alerts
+
+# Storage
+- name: create prometheus pvc
+ oc_pvc:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ name: "{{ openshift_prometheus_pvc_name }}"
+ access_modes: "{{ openshift_prometheus_pvc_access_modes }}"
+ volume_capacity: "{{ openshift_prometheus_pvc_size }}"
+ selector: "{{ openshift_prometheus_pvc_pv_selector }}"
+
+- name: create alertmanager pvc
+ oc_pvc:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ name: "{{ openshift_prometheus_alertmanager_pvc_name }}"
+ access_modes: "{{ openshift_prometheus_alertmanager_pvc_access_modes }}"
+ volume_capacity: "{{ openshift_prometheus_alertmanager_pvc_size }}"
+ selector: "{{ openshift_prometheus_alertmanager_pvc_pv_selector }}"
+
+- name: create alertbuffer pvc
+ oc_pvc:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ name: "{{ openshift_prometheus_alertbuffer_pvc_name }}"
+ access_modes: "{{ openshift_prometheus_alertbuffer_pvc_access_modes }}"
+ volume_capacity: "{{ openshift_prometheus_alertbuffer_pvc_size }}"
+ selector: "{{ openshift_prometheus_alertbuffer_pvc_pv_selector }}"
+
+# create prometheus deployment
+- name: Set prometheus deployment template
+ template:
+ src: prometheus_deployment.j2
+ dest: "{{ tempdir }}/templates/prometheus.yaml"
+ vars:
+ namespace: "{{ openshift_prometheus_namespace }}"
+ prom_replicas: "{{ openshift_prometheus_replicas }}"
+
+- name: Set prometheus deployment
+ oc_obj:
+ state: "{{ state }}"
+ name: "prometheus"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ kind: deployment
+ files:
+ - "{{ tempdir }}/templates/prometheus.yaml"
+ delete_after: true
+
+# prometheus configmap
+# Copy the additional rules file if it is defined
+- name: Copy additional rules file to host
+ copy:
+ src: "{{ openshift_prometheus_additional_rules_file }}"
+ dest: "{{ tempdir }}/prometheus.additional.rules"
+ when:
+ - openshift_prometheus_additional_rules_file is defined
+ - openshift_prometheus_additional_rules_file is not none
+ - openshift_prometheus_additional_rules_file | trim | length > 0
+
+- stat:
+ path: "{{ tempdir }}/prometheus.additional.rules"
+ register: additional_rules_stat
+
+# The kubernetes version impacts the prometheus scraping endpoint
+# so gathering it before constructing the configmap
+- name: get oc version
+ oc_version:
+ register: oc_version
+
+- set_fact:
+ kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}"
+
+- template:
+ src: prometheus.yml.j2
+ dest: "{{ tempdir }}/prometheus.yml"
+ changed_when: no
+
+- template:
+ src: prometheus.rules.j2
+ dest: "{{ tempdir }}/prometheus.rules"
+ changed_when: no
+
+# In prometheus configmap create "additional.rules" section if file exists
+- name: Set prometheus configmap
+ oc_configmap:
+ state: "{{ state }}"
+ name: "prometheus"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ from_file:
+ prometheus.rules: "{{ tempdir }}/prometheus.rules"
+ prometheus.additional.rules: "{{ tempdir }}/prometheus.additional.rules"
+ prometheus.yml: "{{ tempdir }}/prometheus.yml"
+ when: additional_rules_stat.stat.exists == True
+
+- name: Set prometheus configmap
+ oc_configmap:
+ state: "{{ state }}"
+ name: "prometheus"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ from_file:
+ prometheus.rules: "{{ tempdir }}/prometheus.rules"
+ prometheus.yml: "{{ tempdir }}/prometheus.yml"
+ when: additional_rules_stat.stat.exists == False
+
+# alertmanager configmap
+- template:
+ src: alertmanager.yml.j2
+ dest: "{{ tempdir }}/alertmanager.yml"
+ changed_when: no
+
+- name: Set alertmanager configmap
+ oc_configmap:
+ state: "{{ state }}"
+ name: "prometheus-alerts"
+ namespace: "{{ openshift_prometheus_namespace }}"
+ from_file:
+ alertmanager.yml: "{{ tempdir }}/alertmanager.yml"
diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml
new file mode 100644
index 000000000..523a64334
--- /dev/null
+++ b/roles/openshift_prometheus/tasks/main.yaml
@@ -0,0 +1,26 @@
+---
+
+- name: Create temp directory for doing work in on target
+ command: mktemp -td openshift-prometheus-ansible-XXXXXX
+ register: mktemp
+ changed_when: False
+
+- set_fact:
+ tempdir: "{{ mktemp.stdout }}"
+
+- name: Create templates subdirectory
+ file:
+ state: directory
+ path: "{{ tempdir }}/templates"
+ mode: 0755
+ changed_when: False
+
+- include: install_prometheus.yaml
+ vars:
+ state: "{{ openshift_prometheus_state }}"
+
+- name: Delete temp directory
+ file:
+ name: "{{ tempdir }}"
+ state: absent
+ changed_when: False
diff --git a/roles/openshift_prometheus/tasks/nfs.yaml b/roles/openshift_prometheus/tasks/nfs.yaml
new file mode 100644
index 000000000..0b45f2cee
--- /dev/null
+++ b/roles/openshift_prometheus/tasks/nfs.yaml
@@ -0,0 +1,44 @@
+---
+# Tasks to statically provision NFS volumes
+# Include if not using dynamic volume provisioning
+- name: Ensure the /exports/ directory exists
+ file:
+ path: /exports/
+ state: directory
+ mode: 0755
+ owner: root
+ group: root
+
+- name: Ensure the prom-pv0X export directories exist
+ file:
+ path: "/exports/{{ item }}"
+ state: directory
+ mode: 0777
+ owner: nfsnobody
+ group: nfsnobody
+ with_items: "{{ openshift_prometheus_pv_exports }}"
+
+- name: Ensure the NFS exports for Prometheus PVs exist
+ copy:
+ src: openshift_prometheus.exports
+ dest: /etc/exports.d/openshift_prometheus.exports
+ register: nfs_exports_updated
+
+- name: Ensure the NFS export table is refreshed if exports were added
+ command: exportfs -ar
+ when:
+ - nfs_exports_updated.changed
+
+
+######################################################################
+# Create the required Prometheus PVs. Check out these online docs if you
+# need a refresher on includes looping with items:
+# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0
+# * http://stackoverflow.com/a/35128533
+#
+# TODO: Handle the case where a PV template is updated in
+# openshift-ansible and the change needs to be landed on the managed
+# cluster.
+
+- include: create_pvs.yaml
+ with_items: "{{ openshift_prometheus_pv_data }}"
diff --git a/roles/openshift_prometheus/templates/alertmanager.yml.j2 b/roles/openshift_prometheus/templates/alertmanager.yml.j2
new file mode 100644
index 000000000..6c432a3d0
--- /dev/null
+++ b/roles/openshift_prometheus/templates/alertmanager.yml.j2
@@ -0,0 +1,20 @@
+global:
+
+# The root route on which each incoming alert enters.
+route:
+ # default route if none match
+ receiver: alert-buffer-wh
+
+ # The labels by which incoming alerts are grouped together. For example,
+ # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
+ # be batched into a single group.
+ # TODO:
+ group_by: []
+
+ # All the above attributes are inherited by all child routes and can
+ # overwritten on each.
+
+receivers:
+- name: alert-buffer-wh
+ webhook_configs:
+ - url: http://localhost:9099/topics/alerts
diff --git a/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2
new file mode 100644
index 000000000..55a5e19c3
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus-alertbuffer
+ labels:
+ storage: prometheus-alertbuffer
+spec:
+ capacity:
+ storage: 15Gi
+ accessModes:
+ - ReadWriteOnce
+ nfs:
+ path: /exports/prometheus-alertbuffer
+ server: {{ openshift_prometheus_nfs_server }}
+ persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2
new file mode 100644
index 000000000..4ee518735
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus-alertmanager
+ labels:
+ storage: prometheus-alertmanager
+spec:
+ capacity:
+ storage: 15Gi
+ accessModes:
+ - ReadWriteOnce
+ nfs:
+ path: /exports/prometheus-alertmanager
+ server: {{ openshift_prometheus_nfs_server }}
+ persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prom-pv-server.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2
new file mode 100644
index 000000000..933bf0f60
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus
+ labels:
+ storage: prometheus
+spec:
+ capacity:
+ storage: 15Gi
+ accessModes:
+ - ReadWriteOnce
+ nfs:
+ path: /exports/prometheus
+ server: {{ openshift_prometheus_nfs_server }}
+ persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prometheus.rules.j2 b/roles/openshift_prometheus/templates/prometheus.rules.j2
new file mode 100644
index 000000000..e861dc127
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.rules.j2
@@ -0,0 +1,4 @@
+groups:
+- name: example-rules
+ interval: 30s # defaults to global interval
+ rules:
diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2
new file mode 100644
index 000000000..63430f834
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.yml.j2
@@ -0,0 +1,174 @@
+rule_files:
+ - 'prometheus.rules'
+{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %}
+ - 'prometheus.additional.rules'
+{% endif %}
+
+
+
+# A scrape configuration for running Prometheus on a Kubernetes cluster.
+# This uses separate scrape configs for cluster components (i.e. API server, node)
+# and services to allow each to use different authentication configs.
+#
+# Kubernetes labels will be added as Prometheus labels on metrics via the
+# `labelmap` relabeling action.
+
+# Scrape config for API servers.
+#
+# Kubernetes exposes API servers as endpoints to the default/kubernetes
+# service so this uses `endpoints` role and uses relabelling to only keep
+# the endpoints associated with the default/kubernetes service using the
+# default named port `https`. This works for single API server deployments as
+# well as HA API server deployments.
+scrape_configs:
+- job_name: 'kubernetes-apiservers'
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ # Keep only the default/kubernetes service endpoints for the https port. This
+ # will add targets for each API server which Kubernetes adds an endpoint to
+ # the default/kubernetes service.
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+
+# Scrape config for nodes.
+#
+# Each node exposes a /metrics endpoint that contains operational metrics for
+# the Kubelet and other components.
+- job_name: 'kubernetes-nodes'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ kubernetes_sd_configs:
+ - role: node
+
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for controllers.
+#
+# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
+# the controllers.
+#
+# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
+# endpoints.
+- job_name: 'kubernetes-controllers'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ # Keep only the default/kubernetes service endpoints for the https port, and then
+ # set the port to 8444. This is the default configuration for the controllers on OpenShift
+ # masters.
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+ - source_labels: [__address__]
+ action: replace
+ target_label: __address__
+ regex: (.+)(?::\d+)
+ replacement: $1:8444
+
+# Scrape config for cAdvisor.
+#
+# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
+# reports container metrics for each running pod. Scrape those by default.
+- job_name: 'kubernetes-cadvisor'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+{% if kubernetes_version | float() >= 1.7 | float() %}
+ metrics_path: /metrics/cadvisor
+{% else %}
+ metrics_path: /metrics
+{% endif %}
+
+ kubernetes_sd_configs:
+ - role: node
+
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for service endpoints.
+#
+# The relabeling allows the actual service scrape endpoint to be configured
+# via the following annotations:
+#
+# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
+# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
+# to set this to `https` & most likely set the `tls_config` of the scrape config.
+# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
+# * `prometheus.io/port`: If the metrics are exposed on a different port to the
+# service then set this appropriately.
+- job_name: 'kubernetes-service-endpoints'
+
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ # TODO: this should be per target
+ insecure_skip_verify: true
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
+ action: replace
+ target_label: __scheme__
+ regex: (https?)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+ action: replace
+ target_label: __address__
+ regex: (.+)(?::\d+);(\d+)
+ replacement: $1:$2
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
+ action: replace
+ target_label: __basic_auth_username__
+ regex: (.+)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
+ action: replace
+ target_label: __basic_auth_password__
+ regex: (.+)
+ - action: labelmap
+ regex: __meta_kubernetes_service_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_service_name]
+ action: replace
+ target_label: kubernetes_name
+
+alerting:
+ alertmanagers:
+ - scheme: http
+ static_configs:
+ - targets:
+ - "localhost:9093"
diff --git a/roles/openshift_prometheus/templates/prometheus_deployment.j2 b/roles/openshift_prometheus/templates/prometheus_deployment.j2
new file mode 100644
index 000000000..98c117f19
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus_deployment.j2
@@ -0,0 +1,240 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+ name: prometheus
+ namespace: {{ namespace }}
+ labels:
+ app: prometheus
+spec:
+ replicas: {{ prom_replicas|default(1) }}
+ selector:
+ provider: openshift
+ matchLabels:
+ app: prometheus
+ template:
+ metadata:
+ name: prometheus
+ labels:
+ app: prometheus
+ spec:
+ serviceAccountName: prometheus
+{% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %}
+ nodeSelector:
+{% for key, value in openshift_prometheus_node_selector.iteritems() %}
+ {{key}}: "{{value}}"
+{% endfor %}
+{% endif %}
+ containers:
+ # Deploy Prometheus behind an oauth proxy
+ - name: prom-proxy
+ image: "{{ openshift_prometheus_image_proxy }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_memory_requests_limit_proxy is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 8443
+ name: web
+ args:
+ - -provider=openshift
+ - -https-address=:8443
+ - -http-address=
+ - -email-domain=*
+ - -upstream=http://localhost:9090
+ - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+ - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+ - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -skip-auth-regex=^/metrics
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: prometheus-tls
+ - mountPath: /etc/proxy/secrets
+ name: prometheus-secrets
+ - mountPath: /prometheus
+ name: prometheus-data
+
+ - name: prometheus
+ args:
+ - --storage.tsdb.retention=6h
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --web.listen-address=localhost:9090
+ image: "{{ openshift_prometheus_image_prometheus }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_memory_requests is defined and openshift_prometheus_memory_requests is not none %}
+ memory: "{{openshift_prometheus_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_cpu_requests is defined and openshift_prometheus_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_memory_limit is defined and openshift_prometheus_memory_limit is not none %}
+ memory: "{{ openshift_prometheus_memory_limit }}"
+{% endif %}
+{% if openshift_prometheus_cpu_limit is defined and openshift_prometheus_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_cpu_limit}}"
+{% endif %}
+
+ volumeMounts:
+ - mountPath: /etc/prometheus
+ name: prometheus-config
+ - mountPath: /prometheus
+ name: prometheus-data
+
+ # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
+ - name: alerts-proxy
+ image: "{{ openshift_prometheus_image_proxy }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 9443
+ name: web
+ args:
+ - -provider=openshift
+ - -https-address=:9443
+ - -http-address=
+ - -email-domain=*
+ - -upstream=http://localhost:9099
+ - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+ - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+ - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: alerts-tls
+ - mountPath: /etc/proxy/secrets
+ name: alerts-secrets
+
+ - name: alert-buffer
+ args:
+ - --storage-path=/alert-buffer/messages.db
+ image: "{{ openshift_prometheus_image_alertbuffer }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_alertbuffer_memory_requests is defined and openshift_prometheus_alertbuffer_memory_requests is not none %}
+ memory: "{{openshift_prometheus_alertbuffer_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_alertbuffer_cpu_requests is defined and openshift_prometheus_alertbuffer_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_alertbuffer_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_alertbuffer_memory_limit is defined and openshift_prometheus_alertbuffer_memory_limit is not none %}
+ memory: "{{openshift_prometheus_alertbuffer_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_alertbuffer_cpu_limit is defined and openshift_prometheus_alertbuffer_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_alertbuffer_cpu_limit}}"
+{% endif %}
+ volumeMounts:
+ - mountPath: /alert-buffer
+ name: alert-buffer-data
+ ports:
+ - containerPort: 9099
+ name: alert-buf
+
+ - name: alertmanager
+ args:
+ - -config.file=/etc/alertmanager/alertmanager.yml
+ image: "{{ openshift_prometheus_image_alertmanager }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_alertmanager_memory_requests is defined and openshift_prometheus_alertmanager_memory_requests is not none %}
+ memory: "{{openshift_prometheus_alertmanager_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_alertmanager_cpu_requests is defined and openshift_prometheus_alertmanager_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_alertmanager_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_alertmanager_memory_limit is defined and openshift_prometheus_alertmanager_memory_limit is not none %}
+ memory: "{{openshift_prometheus_alertmanager_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_alertmanager_cpu_limit is defined and openshift_prometheus_alertmanager_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_alertmanager_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 9093
+ name: web
+ volumeMounts:
+ - mountPath: /etc/alertmanager
+ name: alertmanager-config
+ - mountPath: /alertmanager
+ name: alertmanager-data
+
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config
+ configMap:
+ defaultMode: 420
+ name: prometheus
+ - name: prometheus-secrets
+ secret:
+ secretName: prometheus-proxy
+ - name: prometheus-tls
+ secret:
+ secretName: prometheus-tls
+ - name: prometheus-data
+{% if openshift_prometheus_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
+ - name: alertmanager-config
+ configMap:
+ defaultMode: 420
+ name: prometheus-alerts
+ - name: alerts-secrets
+ secret:
+ secretName: alerts-proxy
+ - name: alerts-tls
+ secret:
+ secretName: prometheus-alerts-tls
+ - name: alertmanager-data
+{% if openshift_prometheus_alertmanager_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_alertmanager_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
+ - name: alert-buffer-data
+{% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_alertbuffer_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
diff --git a/roles/openshift_prometheus/tests/inventory b/roles/openshift_prometheus/tests/inventory
new file mode 100644
index 000000000..878877b07
--- /dev/null
+++ b/roles/openshift_prometheus/tests/inventory
@@ -0,0 +1,2 @@
+localhost
+
diff --git a/roles/openshift_prometheus/tests/test.yaml b/roles/openshift_prometheus/tests/test.yaml
new file mode 100644
index 000000000..37baf573c
--- /dev/null
+++ b/roles/openshift_prometheus/tests/test.yaml
@@ -0,0 +1,5 @@
+---
+- hosts: localhost
+ remote_user: root
+ roles:
+ - openshift_prometheus